MayaChemTools

   1 package Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints;
   2 #
   3 # File: TopologicalPharmacophoreAtomPairsFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2025 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Fingerprints::Fingerprints;
  30 use TextUtil ();
  31 use MathUtil ();
  32 use Molecule;
  33 use AtomTypes::FunctionalClassAtomTypes;
  34 
  35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  36 
  37 @ISA = qw(Fingerprints::Fingerprints Exporter);
  38 @EXPORT = qw();
  39 @EXPORT_OK = qw();
  40 
  41 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  42 
  43 # Setup class variables...
  44 my($ClassName);
  45 _InitializeClass();
  46 
  47 # Overload Perl functions...
  48 use overload '""' => 'StringifyTopologicalPharmacophoreAtomPairsFingerprints';
  49 
  50 # Class constructor...
  51 sub new {
  52   my($Class, %NamesAndValues) = @_;
  53 
  54   # Initialize object...
  55   my $This = $Class->SUPER::new();
  56   bless $This, ref($Class) || $Class;
  57   $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprints();
  58 
  59   $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties(%NamesAndValues);
  60 
  61   return $This;
  62 }
  63 
  64 # Initialize object data...
  65 #
  66 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprints {
  67   my($This) = @_;
  68 
  69   # Type of fingerprint...
  70   $This->{Type} = 'TopologicalPharmacophoreAtomPairs';
  71 
  72   # Type of vector...
  73   $This->{VectorType} = 'FingerprintsVector';
  74 
  75   # AtomPairsSetSizeToUse...
  76   #
  77   # ArbitrarySize - Corrresponds to atom pairs with non-zero count
  78   # FixedSize - Corresponds to all atom pairs with zero and non-zero count
  79   #
  80   # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize
  81   #
  82   $This->{AtomPairsSetSizeToUse} = '';
  83 
  84   # Type of FingerprintsVector...
  85   #
  86   # OrderedNumericalValues - For ArbitrarySize value of AtomPairsSetSizeToUse
  87   # NumericalValues - For FixedSize value of AtomPairsSetSizeToUse
  88   #
  89   # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues
  90   #
  91   $This->{FingerprintsVectorType} = '';
  92 
  93   # Vector values precision for real values which might be generated after
  94   # normalization and fuzzification...
  95   $This->{ValuesPrecision} = 2;
  96 
  97   # Minimum and maximum bond distance between pharmacophore atom paris...
  98   $This->{MinDistance} = 1;
  99   $This->{MaxDistance} = 10;
 100 
 101   # Initialize atom types and weight information...
 102   $This->_InitializePharmacophoreAtomTypesAndWeightInformation();
 103 
 104   # Normalization methodology to use for scaling the occurance count of pharmacophore atom
 105   # pairs at various distances.
 106   #
 107   # Possible values: None, ByHeavyAtomsCount, ByAtomTypesCount. Default: None
 108   #
 109   $This->{NormalizationMethodology} = 'None';
 110 
 111   # Initialize fuzzification parameters...
 112   #
 113   $This->_InitializeFuzzificationInformation();
 114 
 115   # Pharmacophore types assigned to each heavy atom...
 116   #
 117   %{$This->{AssignedAtomTypes}} = ();
 118 
 119   # Assigned Atom types count of each type in the molecule...
 120   #
 121   %{$This->{AssignedAtomTypesCount}} = ();
 122 
 123   # All pharmacophore atom pairs between minimum and maximum distance...
 124   #
 125   @{$This->{AtomPairsIDs}} = ();
 126   %{$This->{AtomPairsCount}} = ();
 127 }
 128 
 129 # Inialize pharmacophore atom types and weight information...
 130 #
 131 sub _InitializePharmacophoreAtomTypesAndWeightInformation {
 132   my($This) = @_;
 133 
 134   # Default pharmacophore atom types to use for atom pairs fingerprint generation
 135   # are: HBD, HBA, PI, NI, H
 136   #
 137   @{$This->{AtomTypesToUse}} = ();
 138   @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H');
 139 
 140   # Weight of the various pharmacophore atom types to use for their contribution to atom
 141   # pair interaction. It allows to increase the importance of specific pharmacophore atom
 142   # types in the generted fingerprints.
 143   #
 144   # A value of 0 eliminates the contribution by a particular pharmacophore atom
 145   # type and 2 doubles its contribution.
 146   #
 147   my($AtomType, %AvailableAtomTypes);
 148 
 149   %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 150 
 151   %{$This->{AtomTypesWeight}} = ();
 152   for $AtomType (keys %AvailableAtomTypes) {
 153     $This->{AtomTypesWeight}{$AtomType} = 1;
 154   }
 155   return $This;
 156 }
 157 
 158 # Initialize fuzzification information...
 159 #
 160 sub _InitializeFuzzificationInformation {
 161   my($This) = @_;
 162 
 163   # To fuzz or not to fuzz atom pairs count. Default: No fuzzication
 164   #
 165   $This->{FuzzifyAtomPairsCount} = 0;
 166 
 167   # When to fuzz atom pair count...
 168   #
 169   # Possible values: BeforeNormalization or AfterNormalization. Default: AfterNormalization
 170   #
 171   $This->{FuzzificationMode} = 'AfterNormalization';
 172 
 173   # How to fuzz atom pair count...
 174   #
 175   # Possible values: FuzzyBinning or FuzzyBinSmoothing. Default: FuzzyBinning
 176   #
 177   $This->{FuzzificationMethodology} = 'FuzzyBinning';
 178 
 179   # By how much to fuzz atom pairs count...
 180   #
 181   $This->{FuzzFactor} = 0.15;
 182 
 183   return $This;
 184 }
 185 
 186 # Initialize class ...
 187 sub _InitializeClass {
 188   #Class name...
 189   $ClassName = __PACKAGE__;
 190 }
 191 
 192 # Initialize object properties....
 193 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties {
 194   my($This, %NamesAndValues) = @_;
 195 
 196   my($Name, $Value, $MethodName);
 197   while (($Name, $Value) = each  %NamesAndValues) {
 198     $MethodName = "Set${Name}";
 199     $This->$MethodName($Value);
 200   }
 201 
 202   # Make sure molecule object was specified...
 203   if (!exists $NamesAndValues{Molecule}) {
 204     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 205   }
 206 
 207   $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector();
 208 
 209   return $This;
 210 }
 211 
 212 # Initialize fingerprints vector...
 213 #
 214 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector {
 215   my($This) = @_;
 216 
 217   if (!$This->{AtomPairsSetSizeToUse}) {
 218     $This->{AtomPairsSetSizeToUse} =  'ArbitrarySize';
 219   }
 220 
 221   # Vector type and type of values...
 222   $This->{VectorType} = 'FingerprintsVector';
 223 
 224   if ($This->{AtomPairsSetSizeToUse} =~ /^FixedSize$/i) {
 225     $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
 226   }
 227   else {
 228     $This->{FingerprintsVectorType} = 'NumericalValues';
 229   }
 230 
 231   $This->_InitializeFingerprintsVector();
 232 }
 233 
 234 # Set atom parits set size to use...
 235 #
 236 sub SetAtomPairsSetSizeToUse {
 237   my($This, $Value) = @_;
 238 
 239   if ($This->{AtomPairsSetSizeToUse}) {
 240     croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Can't change size:  It's already set...";
 241   }
 242 
 243   if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) {
 244     croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Unknown AtomPairsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
 245   }
 246 
 247   $This->{AtomPairsSetSizeToUse} = $Value;
 248 
 249   return $This;
 250 }
 251 
 252 # Disable change of AvailableAtomTypes...
 253 #
 254 sub SetAvailableAtomTypes {
 255   my($This) = @_;
 256 
 257   carp "Warning: ${ClassName}->SetAvailableAtomTypes: AvailableAtomTypes value can't be set...";
 258 
 259   return $This;
 260 }
 261 
 262 # Set atom types to use for atom pairs...
 263 #
 264 sub SetAtomTypesToUse {
 265   my($This, @Values) = @_;
 266   my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse);
 267 
 268   if (!@Values) {
 269     carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified...";
 270     return;
 271   }
 272 
 273   $FirstValue = $Values[0];
 274   $TypeOfFirstValue = ref $FirstValue;
 275 
 276   @SpecifiedAtomTypes = ();
 277   @AtomTypesToUse = ();
 278 
 279   if ($TypeOfFirstValue =~ /^ARRAY/) {
 280     push @SpecifiedAtomTypes, @{$FirstValue};
 281   }
 282   else {
 283     push @SpecifiedAtomTypes, @Values;
 284   }
 285 
 286   # Make sure specified AtomTypes are valid...
 287   for $SpecifiedAtomType (@SpecifiedAtomTypes) {
 288     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) {
 289       croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n ";
 290     }
 291     $AtomType = $SpecifiedAtomType;
 292     push @AtomTypesToUse, $AtomType;
 293   }
 294 
 295   # Set atom types to use...
 296   @{$This->{AtomTypesToUse}} = ();
 297   push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse;
 298 
 299   return $This;
 300 }
 301 
 302 # Set vector values precision for real values which might be generated after
 303 # normalization and fuzzification...
 304 #
 305 sub SetValuesPrecision {
 306   my($This, $Value) = @_;
 307 
 308   if (!TextUtil::IsPositiveInteger($Value)) {
 309     croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid:  It must be a positive integer...";
 310   }
 311   $This->{ValuesPrecision} = $Value;
 312 
 313   return $This;
 314 }
 315 
 316 # Set minimum distance for pharmacophore atom pairs...
 317 #
 318 sub SetMinDistance {
 319   my($This, $Value) = @_;
 320 
 321   if (!TextUtil::IsInteger($Value)) {
 322     croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid:  It must be an integer...";
 323   }
 324   $This->{MinDistance} = $Value;
 325 
 326   return $This;
 327 }
 328 
 329 # Set maximum distance for pharmacophore atom pairs...
 330 #
 331 sub SetMaxDistance {
 332   my($This, $Value) = @_;
 333 
 334   if (!TextUtil::IsPositiveInteger($Value)) {
 335     croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid:  It must be a positive integer...";
 336   }
 337   $This->{MaxDistance} = $Value;
 338 
 339   return $This;
 340 }
 341 
 342 # Set normalization methodology to use for scaling the occurance count of pharmacophore atom
 343 # pairs over distance range beween minimum and maximum distance.
 344 #
 345 sub SetNormalizationMethodology {
 346   my($This, $Value) = @_;
 347 
 348   if ($Value !~ /^(ByHeavyAtomsCount|ByAtomTypesCount|None)$/i) {
 349     croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByAtomTypesCount...";
 350   }
 351 
 352   $This->{NormalizationMethodology} = $Value;
 353 
 354   return $This;
 355 }
 356 
 357 # Set weight of the various pharmacophore atom types to use for their contribution to atom
 358 # pair interaction using atom types label and value hash.
 359 #
 360 # It allows to increase the importance of specific pharmacophore atom
 361 # types in the generted fingerprints.
 362 #
 363 # A value of 0 eliminates the contribution by a particular pharmacophore atom
 364 # type and 2 doubles its contribution.
 365 #
 366 sub SetAtomTypesWeight {
 367   my($This, %AtomTypesWeight) = @_;
 368   my($AtomType, $Weight);
 369 
 370   while (($AtomType, $Weight) = each %AtomTypesWeight) {
 371     if (!exists $This->{AtomTypesWeight}{$AtomType}) {
 372       croak "Error: ${ClassName}->SetAtomTypesWeight: AtomTypeWeight for $AtomType couldn't be set: Unknown atom type...";
 373     }
 374     if (!(TextUtil::IsFloat($Weight) && ($Weight >= 0))) {
 375       croak "Error: ${ClassName}->SetAtomTypesWeight: Specified weight value, $Weight, for AtomType, $AtomType, muts be >= 0...";
 376     }
 377     $This->{AtomTypesWeight}{$AtomType}  = $Weight;
 378   }
 379 }
 380 
 381 # Set fuzzification methodology to use for fuzzifying atom pairs count...
 382 #
 383 sub SetFuzzificationMethodology {
 384   my($This, $Value) = @_;
 385 
 386   if ($Value !~ /^(FuzzyBinning|FuzzyBinSmoothing)$/i) {
 387     croak "Error: ${ClassName}->SetFuzzificationMethodology: FuzzificationMethodology value, $Value, is not valid. Supported values: FuzzyBinning or FuzzyBinSmoothing...";
 388   }
 389 
 390   $This->{FuzzificationMethodology} = $Value;
 391 
 392   return $This;
 393 }
 394 
 395 # Set fuzzification mode for fuzzifying atom pairs count...
 396 #
 397 sub SetFuzzificationMode {
 398   my($This, $Value) = @_;
 399 
 400   if ($Value !~ /^(BeforeNormalization|AfterNormalization)$/i) {
 401     croak "Error: ${ClassName}->SetFuzzificationMode: FuzzificationMode value, $Value, is not valid. Supported values: BeforeNormalization or AfterNormalization...";
 402   }
 403 
 404   $This->{FuzzificationMode} = $Value;
 405 
 406   return $This;
 407 }
 408 
 409 # Set fuzz factor values used for fuzzifying atom pairs count...
 410 #
 411 sub SetFuzzFactor {
 412   my($This, $Value) = @_;
 413 
 414   if ($This->{FuzzificationMethodology} =~ /^FuzzyBinning$/i) {
 415     if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 1.0)) {
 416       croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 1...";
 417     }
 418   }
 419   elsif ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) {
 420     if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 0.5)) {
 421       croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 0.5...";
 422     }
 423   }
 424   else {
 425     croak "Error: ${ClassName}->SetFuzzFactor: Fuzz factor value can't be changed: Uknown FuzzificationMethodology: $This->{FuzzificationMethodology}...";
 426   }
 427 
 428   $This->{FuzzFactor} = $Value;
 429 
 430   return $This;
 431 }
 432 
 433 # Generate fingerprints description...
 434 #
 435 sub GetDescription {
 436   my($This) = @_;
 437 
 438   # Is description explicity set?
 439   if (exists $This->{Description}) {
 440     return $This->{Description};
 441   }
 442 
 443   # Generate fingerprints description...
 444 
 445   return "$This->{Type}:$This->{AtomPairsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
 446 }
 447 
 448 # Generate topological pharmacophore atom pairs [ Ref 60-62, Ref 65, Ref 68 ] fingerprints...
 449 #
 450 # Methodology:
 451 #   . Generate a distance matrix.
 452 #   . Assign pharmacophore atom types to all the atoms.
 453 #   . Initialize pharmacophore atom pairs basis set for all unique pairs between
 454 #     minimum and maximum distance.
 455 #   . Using distance matrix and pharmacophore atom types, count occurance of
 456 #     unique atom pairs between specified distance range - It corresponds to the
 457 #     correlation-vector for the atom pairs.
 458 #       . Weigh contribution of each atom type to atom pair interaction by its specified
 459 #         weight during occurance count.
 460 #       . Assign count to appropriate distance bin for a specific atom pair
 461 #
 462 #   . Normalize occurance count of pharmacophore atom pairs by heavy atom count
 463 #     or sum of AtomTypeCounts of each pharmacophore atom type in the atom pair
 464 #     at a specific distance.
 465 #
 466 #   . Fuzzify occurance count of pharmacophore atom pairs using FuzzyBinning or
 467 #     FuzzySmothing methodology.
 468 #
 469 # Notes:
 470 #   . Hydrogen atoms are ignored during the fingerprint generation.
 471 #
 472 sub GenerateFingerprints {
 473   my($This) = @_;
 474 
 475   if ($This->{MinDistance} > $This->{MaxDistance}) {
 476     croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
 477   }
 478 
 479   # Cache appropriate molecule data...
 480   $This->_SetupMoleculeDataCache();
 481 
 482   # Generate distance matrix...
 483   if (!$This->_SetupDistanceMatrix()) {
 484     carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix...";
 485     return $This;
 486   }
 487 
 488   # Assign pharmacohore atom types to all heavy atoms...
 489   $This->_AssignPharmacophoreAtomTypes();
 490 
 491   # Initialize values of all possible pharmacohore atom pairs...
 492   $This->_InitializePharmacophoreAtomPairs();
 493 
 494   # Count atom pairs...
 495   $This->_CountPharmacohoreAtomPairs();
 496 
 497   # Fuzzify atom pairs count...
 498   if ($This->{FuzzificationMode} =~ /^BeforeNormalization$/i) {
 499     $This->_FuzzifyPharmacohoreAtomPairsCount();
 500   }
 501 
 502   # Normalize atom pairs count...
 503   $This->_NormalizePharmacohoreAtomPairsCount();
 504 
 505   # Fuzzify atom pairs count...
 506   if ($This->{FuzzificationMode} =~ /^AfterNormalization$/i) {
 507     $This->_FuzzifyPharmacohoreAtomPairsCount();
 508   }
 509 
 510   # Set final fingerprints...
 511   $This->_SetFinalFingerprints();
 512 
 513   # Clear cached molecule data...
 514   $This->_ClearMoleculeDataCache();
 515 
 516   return $This;
 517 }
 518 
 519 # Setup distance matrix...
 520 #
 521 sub _SetupDistanceMatrix {
 522   my($This) = @_;
 523 
 524   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 525 
 526   if (!$This->{DistanceMatrix}) {
 527     return undef;
 528   }
 529 
 530   return $This;
 531 }
 532 
 533 # Assign pharmacohore atom types to all heavy atoms and count each atom
 534 # types assigned...
 535 #
 536 sub _AssignPharmacophoreAtomTypes {
 537   my($This) = @_;
 538   my($Atom, $AtomID, $AtomType, $AssignedAtomType, $FunctionalClassAtomTypes);
 539 
 540   # Assign topological pharmacophore atom types...
 541   $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse});
 542   $FunctionalClassAtomTypes->AssignAtomTypes();
 543 
 544   %{$This->{AssignedAtomTypes}} = ();
 545 
 546   # Initialize assigned atom types count...
 547   %{$This->{AssignedAtomTypesCount}} = ();
 548   for $AtomType (@{$This->{AtomTypesToUse}}) {
 549     $This->{AssignedAtomTypesCount}{$AtomType} = 0;
 550   }
 551 
 552   $This->{HeavyAtomCount} = 0;
 553 
 554   ATOM: for $Atom (@{$This->{Atoms}}) {
 555     if ($Atom->IsHydrogen()) {
 556       next ATOM;
 557     }
 558     $This->{HeavyAtomCount} += 1;
 559 
 560     $AtomID = $Atom->GetID();
 561 
 562     # Collect all possible pharmacophore atom types which could be assigned to atom...
 563     my(@AtomTypes);
 564 
 565     @AtomTypes = ();
 566     $AssignedAtomType = $FunctionalClassAtomTypes->GetAtomType($Atom);
 567     if ($AssignedAtomType && $AssignedAtomType !~ /^None$/i) {
 568       push @AtomTypes, split /\./, $AssignedAtomType;
 569       for $AtomType (@AtomTypes) {
 570         $This->{AssignedAtomTypesCount}{$AtomType} += 1;
 571       }
 572     }
 573 
 574     # Assign phramacophore types to atom...
 575     $AtomID = $Atom->GetID();
 576     $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes;
 577   }
 578   return $This;
 579 }
 580 
 581 # Initialize values of all possible pharmacohore atom pairs...
 582 #
 583 # Let:
 584 #   Dmin = Minimum distance correspoding to number of bonds between two atoms
 585 #   Dmax = Maximum distance correspoding to number of bonds between two atoms
 586 #   D = Distance correspoding to number of bonds between two atoms
 587 #
 588 #   P = Number of pharmacophore atom types to consider
 589 #   PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn
 590 #
 591 #   PPT = Total number of possible pharmacophore atom pairs at all distances between Dmin and Dmax
 592 #
 593 # Then:
 594 #
 595 #   PPD =  (P * (P - 1))/2 + P
 596 #
 597 #   PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P)
 598 #       = ((Dmax - Dmin) + 1) * PPD
 599 #
 600 #
 601 # So for default values of Dmin = 1, Dmax = 10 and P = 5,
 602 #
 603 #   PPD =  (5 * (5 - 1))/2 + 5 = 15
 604 #   PPT = ((10 - 1) + 1) * 15 = 150
 605 #
 606 # the pharmacophore atom pairs bais set includes 150 values.
 607 #
 608 sub _InitializePharmacophoreAtomPairs {
 609   my($This) = @_;
 610   my($Distance, $Index1, $Index2, $AtomType1, $AtomType2);
 611 
 612   %{$This->{AtomPairsCount}} = ();
 613 
 614   for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
 615     %{$This->{AtomPairsCount}{$Distance}} = ();
 616 
 617     for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
 618       $AtomType1 = $This->{AtomTypesToUse}[$Index1];
 619       %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = ();
 620 
 621       for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
 622         $AtomType2 = $This->{AtomTypesToUse}[$Index2];
 623         $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 0;
 624       }
 625     }
 626   }
 627   return $This;
 628 }
 629 
 630 # Count pharmacophore atom pairs between mininum and maximum distance at each
 631 # distance using distance matrix and pharmacophore atom types assiged to each heavy
 632 # atom.
 633 #
 634 # Let:
 635 #   Px = Pharmacophore atom type x
 636 #   Py = Pharmacophore atom type y
 637 #   Dn = Distance between Px and Py in specified distance range
 638 #
 639 # Then:
 640 #   Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn
 641 #
 642 # For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on
 643 #
 644 # Notes:
 645 #   . The row and column indices of distance matrix correspond to atom indices.
 646 #   . Distance value of BigNumber implies the atom is not connected to any other atom.
 647 #   . Due to symmetric nature of distance matrix, only upper or lower triangular matrix
 648 #     needs to be processed during identification and count of pharmacophore atom pairs.
 649 #
 650 sub _CountPharmacohoreAtomPairs {
 651   my($This) = @_;
 652   my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement);
 653 
 654   $DistanceMatrix = $This->{DistanceMatrix};
 655   ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
 656   $SkipIndexCheck = 0;
 657 
 658   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 659     $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
 660     if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) {
 661       next ROWINDEX;
 662     }
 663 
 664     COLINDEX: for $ColIndex ($RowIndex .. ($NumOfCols - 1) ) {
 665       $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
 666       if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) {
 667         next COLINDEX;
 668       }
 669 
 670       $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
 671       if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
 672         next COLINDEX;
 673       }
 674 
 675       ATOMTYPE1: for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) {
 676         if ($This->{AtomTypesWeight}{$AtomType1} == 0) {
 677           next ATOMTYPE1;
 678         }
 679         ATOMTYPE2: for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) {
 680           if ($This->{AtomTypesWeight}{$AtomType2} == 0) {
 681             next ATOMTYPE2;
 682           }
 683           $CountIncrement = $This->{AtomTypesWeight}{$AtomType1} * $This->{AtomTypesWeight}{$AtomType2};
 684           if ($AtomType1 le $AtomType2) {
 685             $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += $CountIncrement;
 686           }
 687           else {
 688             $This->{AtomPairsCount}{$Distance}{$AtomType2}{$AtomType1} += $CountIncrement;
 689           }
 690         }
 691       }
 692     }
 693   }
 694   return $This;
 695 }
 696 
 697 # Normalize the occurance count of pharmacophore atom pairs over the specified distance
 698 # range...
 699 #
 700 sub _NormalizePharmacohoreAtomPairsCount {
 701   my($This) = @_;
 702 
 703   METHODOLOGY: {
 704     if ($This->{NormalizationMethodology} =~ /^None$/i) {
 705       last METHODOLOGY;
 706     }
 707     if ($This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) {
 708       $This->_NormalizeAtomPairsCountByHeavyAtomsCount();
 709       last METHODOLOGY;
 710     }
 711     if ($This->{NormalizationMethodology} =~ /^ByAtomTypesCount$/i) {
 712       $This->_NormalizeAtomPairsCountByAtomTypesCount();
 713       last METHODOLOGY;
 714     }
 715     croak "Error: ${ClassName}->_NormalizePharmacohoreAtomPairsCount: Unknown NormalizationMethodology: $This->{NormalizationMethodology}...";
 716   }
 717   return $This;
 718 }
 719 
 720 
 721 # Normalize the occurance count of pharmacophore atom pairs at various distances by
 722 # heavy atom count...
 723 #
 724 sub _NormalizeAtomPairsCountByHeavyAtomsCount {
 725   my($This) = @_;
 726   my($Distance, $AtomType1, $AtomType2);
 727 
 728   if ($This->{HeavyAtomCount} == 0) {
 729     return $This;
 730   }
 731 
 732   for $Distance (keys %{$This->{AtomPairsCount}} ) {
 733     for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) {
 734       ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
 735         if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) {
 736           next ATOMTYPE2;
 737         }
 738         $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $This->{HeavyAtomCount};
 739       }
 740     }
 741   }
 742   return $This;
 743 }
 744 
 745 # Normalize the occurance count of pharmacophore atom pairs at various distances by
 746 # dividing it using sum of the count of each pharmacophore atom type present in the
 747 # molecule for the corresponding atom pair.
 748 #
 749 sub _NormalizeAtomPairsCountByAtomTypesCount {
 750   my($This) = @_;
 751   my($Distance, $AtomType1, $AtomType2, $AtomType1Count, $AtomType2Count, $NormalizationFactor);
 752 
 753   for $Distance (keys %{$This->{AtomPairsCount}} ) {
 754     for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) {
 755       ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
 756         if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) {
 757           next ATOMTYPE2;
 758         }
 759         $NormalizationFactor = $This->{AssignedAtomTypesCount}{$AtomType1} + $This->{AssignedAtomTypesCount}{$AtomType2};
 760         $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $NormalizationFactor;
 761       }
 762     }
 763   }
 764   return $This;
 765 }
 766 
 767 # Fuzzify pharmacophore atom pairs count...
 768 #
 769 # Let:
 770 #   Px = Pharmacophore atom type x
 771 #   Py = Pharmacophore atom type y
 772 #
 773 #   PPxy = Pharmacophore atom pair between atom type Px and Py
 774 #
 775 #   PPxyDn = Pharmacophore atom pairs count between atom type Px and Py at distance Dn
 776 #   PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn - 1
 777 #   PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn + 1
 778 #
 779 #   FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing
 780 #
 781 # Then:
 782 #
 783 # For FuzzyBinning:
 784 #
 785 #   PPxyDn = PPxyDn (Unchanged)
 786 #
 787 #   PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
 788 #   PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
 789 #
 790 # For FuzzyBinSmoothing:
 791 #
 792 #   PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax
 793 #   PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax
 794 #
 795 #   PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
 796 #   PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
 797 #
 798 # In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurance counts.
 799 # A value of 1 during FuzzyBinning corresponds to maximum fuzzification of occurance counts;
 800 # however, a value of 1 during FuzzyBinSmoothing ends up completely distributing the value over
 801 # the previous and next distance bins.
 802 #
 803 # So for default value of FuzzFactor (FF) 0.15, the occurance count of pharmacohore atom pairs
 804 # at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1
 805 # are incremened by PPxyDn * 0.15.
 806 #
 807 # And during FuzzyBinSmoothing the occurance counts at Distance Dn is scaled back using multiplicate
 808 # factor of (1 - 2*0.15) and the occurance counts at distances Dn -1 and Dn + 1 are incremened by
 809 # PPxyDn * 0.15. In otherwords, occurance bin count is smoothed out by distributing it over the
 810 # previous and next distance value.
 811 #
 812 sub _FuzzifyPharmacohoreAtomPairsCount {
 813   my($This) = @_;
 814   my($Index1, $Index2, $AtomType1, $AtomType2, $CurrentDistance, $CurrentCount, $NextDistance, $NextCount, $PreviousDistance, $ModifyCurrentCount, $ChangeInCountValue);
 815 
 816   if (!($This->{FuzzifyAtomPairsCount} && $This->{FuzzFactor} > 0)) {
 817     return $This;
 818   }
 819 
 820   $ModifyCurrentCount = ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) ? 1 : 0;
 821 
 822   for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
 823     $AtomType1 = $This->{AtomTypesToUse}[$Index1];
 824     for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
 825       $AtomType2 = $This->{AtomTypesToUse}[$Index2];
 826 
 827       $CurrentCount = 0; $NextCount = 0;
 828 
 829       $NextDistance = $This->{MinDistance};
 830       $NextCount = $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2};
 831 
 832       DISTANCE: for $CurrentDistance ($This->{MinDistance} .. $This->{MaxDistance}) {
 833         $NextDistance = $CurrentDistance + 1;
 834         $PreviousDistance = $CurrentDistance - 1;
 835 
 836         $CurrentCount = $NextCount;
 837         $NextCount = ($CurrentDistance < $This->{MaxDistance}) ? $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} : 0;
 838 
 839         if ($CurrentCount == 0) {
 840           # No contribution to fuzzy binning from this distance...
 841           next DISTANCE;
 842         }
 843 
 844         $ChangeInCountValue = $CurrentCount * $This->{FuzzFactor};
 845 
 846         if ($CurrentDistance > $This->{MinDistance}) {
 847           # Increment count at previous distance...
 848           $This->{AtomPairsCount}{$PreviousDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue;
 849         }
 850 
 851         if ($ModifyCurrentCount) {
 852           # Decrement count at current distance for FuzzyBinSmoothing...
 853           if ($CurrentDistance > $This->{MinDistance} && $CurrentDistance < $This->{MaxDistance}) {
 854             $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= 2 * $ChangeInCountValue;
 855           }
 856           else {
 857             $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= $ChangeInCountValue;
 858           }
 859         }
 860 
 861         if ($CurrentDistance < $This->{MaxDistance}) {
 862           # Increment count at next distance...
 863           $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue;
 864         }
 865       }
 866     }
 867   }
 868   return $This;
 869 }
 870 
 871 # Set final fingerpritns vector...
 872 #
 873 sub _SetFinalFingerprints {
 874   my($This) = @_;
 875   my($Distance, $Index1, $Index2, $AtomType1, $AtomType2, $Value, $RoundOffValues, $ValuesPrecision, $UseArbitrarySetSize, @Values);
 876 
 877   # Mark successful generation of fingerprints...
 878   $This->{FingerprintsGenerated} = 1;
 879 
 880   @Values = ();
 881   @{$This->{AtomPairsIDs}} = ();
 882 
 883   # Do values need to be rounded off?
 884   $RoundOffValues = (($This->{NormalizationMethodology} !~ /^None$/i) || ($This->{FuzzifyAtomPairsCount})) ? 1 : 0;
 885   $ValuesPrecision = $This->{ValuesPrecision};
 886 
 887   # Is it an ArbitraySize atom pairs set size?
 888   $UseArbitrarySetSize = $This->{AtomPairsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0;
 889 
 890   # Collect all atom paris count values...
 891   for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
 892     for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) {
 893       $AtomType1 = $This->{AtomTypesToUse}[$Index1];
 894       INDEX2: for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) {
 895         $AtomType2 = $This->{AtomTypesToUse}[$Index2];
 896 
 897         # Atom pair count...
 898         $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2};
 899         if ($RoundOffValues) {
 900           $Value = MathUtil::round($Value, $This->{ValuesPrecision}) + 0;
 901         }
 902 
 903         # Ignore or not to ignore...
 904         if ($UseArbitrarySetSize && $Value == 0) {
 905           next INDEX2;
 906         }
 907 
 908         push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}";
 909         push @Values, $Value;
 910       }
 911     }
 912   }
 913 
 914   # Add AtomPairsIDs and count values to fingerprint vector...
 915   $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}});
 916   $This->{FingerprintsVector}->AddValues(\@Values);
 917 
 918   return $This;
 919 }
 920 
 921 # Get pharmacophore atom pair IDs corresponding to atom pairs count values in
 922 # fingerprint vector as an array or reference to an array...
 923 #
 924 # AtomPairIDs list  is generated during finalization  of fingerprints  and the fingerprint
 925 # vector containing count values matches the atom pairs array.
 926 #
 927 #
 928 sub GetAtomPairIDs {
 929   my($This) = @_;
 930 
 931   return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}};
 932 }
 933 
 934 # Cache  appropriate molecule data...
 935 #
 936 sub _SetupMoleculeDataCache {
 937   my($This) = @_;
 938 
 939   # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
 940   # usage of distance matrix. The hydrogen atoms are ignored during processing...
 941   #
 942   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 943 
 944   # Get all atom IDs...
 945   my(@AtomIDs);
 946   @AtomIDs = ();
 947   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 948 
 949   # Set AtomIndex to AtomID hash...
 950   %{$This->{AtomIndexToID}} = ();
 951   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 952 
 953   return $This;
 954 }
 955 
 956 # Clear cached molecule data...
 957 #
 958 sub _ClearMoleculeDataCache {
 959   my($This) = @_;
 960 
 961   @{$This->{Atoms}} = ();
 962 
 963   return $This;
 964 }
 965 
 966 
 967 # Return a string containg data for TopologicalPharmacophoreAtomPairsFingerprints object...
 968 sub StringifyTopologicalPharmacophoreAtomPairsFingerprints {
 969   my($This) = @_;
 970   my($FingerprintsString);
 971 
 972   # Type of fingerprint...
 973   $FingerprintsString = "Fingerprint type: $This->{Type}; AtomPairsSetSizeToUse: $This->{AtomPairsSetSizeToUse}";
 974 
 975   # Min and max distance...
 976   $FingerprintsString .= "; MinDistance:  $This->{MinDistance}; MaxDistance: $This->{MaxDistance}";
 977 
 978   # Pharmacophore type labels and description...
 979   my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes);
 980 
 981   @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 982   %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 983 
 984   @AtomTypes = ();
 985   for $AtomType (@AtomTypesOrder) {
 986     push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}";
 987   }
 988 
 989   $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">";
 990   $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">";
 991   $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">";
 992 
 993   # Normalization method...
 994   $FingerprintsString .= "; NormalizationMethodology: $This->{NormalizationMethodology}";
 995 
 996   # Weights...
 997   my($FirstLabel, $Label, $Weight);
 998 
 999   $FingerprintsString .= "; AtomTypesWeight <Labels: Weight>: <";
1000   $FirstLabel = 1;
1001   for $Label (sort @{$This->{AtomTypesToUse}}) {
1002     $Weight = $This->{AtomTypesWeight}{$Label};
1003     if ($FirstLabel) {
1004       $FirstLabel = 0;
1005       $FingerprintsString .= " ${Label}: ${Weight}";
1006     }
1007     else {
1008       $FingerprintsString .= "; ${Label}: ${Weight}";
1009     }
1010   }
1011   $FingerprintsString .= ">";
1012 
1013   # Fuzzification of count...
1014   my($FuzzifyFlag);
1015   $FuzzifyFlag = $This->{FuzzifyAtomPairsCount} ? "Yes" : "No";
1016   $FingerprintsString .= "; FuzzifyAtomPairsCount: $FuzzifyFlag; FuzzificationMode: $This->{FuzzificationMode}; FuzzificationMethodology: $This->{FuzzificationMethodology}; FuzzFactor: $This->{FuzzFactor}";
1017 
1018   # Total number of pharmacophore atom pairs...
1019   $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues();
1020 
1021   # FingerprintsVector...
1022   $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1023 
1024   return $FingerprintsString;
1025 }
1026