MayaChemTools

   1 package Fingerprints::FingerprintsStringUtil;
   2 #
   3 # File: FingerprintsStringUtil.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2025 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Exporter;
  28 use Carp;
  29 use TextUtil ();
  30 use Fingerprints::FingerprintsBitVector;
  31 use Fingerprints::FingerprintsVector;
  32 
  33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  34 
  35 @ISA = qw(Exporter);
  36 @EXPORT = qw();
  37 @EXPORT_OK = qw(AreFingerprintsStringValuesValid GenerateFingerprintsString GenerateFingerprintsBitVectorString GenerateFingerprintsVectorString GetFingerprintsStringTypeAndDescription GetDefaultBitsOrder GetDefaultBitStringFormat GetDefaultVectorStringFormat GetFingeprintsStringDelimiter GetFingerprintsStringValues ParseFingerprintsString ParseFingerprintsBitVectorString ParseFingerprintsVectorString);
  38 
  39 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  40 
  41 # Fingerprint string values delimiter...
  42 my($FPStringDelim) = ';';
  43 
  44 # Generate fingerprints string...
  45 #
  46 sub GenerateFingerprintsString {
  47   my($FingerprintsObject) = @_;
  48   my($VectorType);
  49 
  50   $VectorType = $FingerprintsObject->GetVectorType();
  51 
  52   VECTORTYPE : {
  53     if ($VectorType =~ /^FingerprintsBitVector$/i) { return GenerateFingerprintsBitVectorString(@_); last VECTORTYPE; }
  54     if ($VectorType =~ /^FingerprintsVector$/i) { return GenerateFingerprintsVectorString(@_); last VECTORTYPE; }
  55     croak "Error: FingerprintsStringUtil::GenerateFingerprintsString: Fingerprints object vector type, $VectorType, is not supported. Valid values: FingerprintsBitVector or FingerprintsVector...";
  56   }
  57   return '';
  58 }
  59 
  60 # Generate fingerprints bit vector string...
  61 #
  62 sub GenerateFingerprintsBitVectorString {
  63   my($FingerprintsObject, $BitStringFormat, $BitsOrder) = @_;
  64   my($FingerprintsString, $FingerprintsBitVector, @FingerprintsStringValues);
  65 
  66   if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); }
  67   if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); }
  68 
  69   $FingerprintsString = '';
  70   $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::IsFingerprintsBitVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsBitVector();
  71 
  72   # Use specified size instead of size: it corresponds to actual size of the fingerprints bit vector;
  73   # size reflects actual internal size including any padding.
  74   #
  75 
  76   @FingerprintsStringValues = ();
  77   push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsBitVector->GetSpecifiedSize(), $BitStringFormat, $BitsOrder);
  78 
  79   $FingerprintsString = join("${FPStringDelim}",  @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintBitVectorString($FingerprintsBitVector, $BitStringFormat, $BitsOrder);
  80 
  81   return $FingerprintsString;
  82 }
  83 
  84 # Get fingerprint bit vector string...
  85 #
  86 sub _GetFingerprintBitVectorString {
  87   my($FingerprintsBitVector, $BitStringFormat, $BitsOrder) = @_;
  88   my($FingerprintBitString);
  89 
  90   if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); }
  91   if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); }
  92 
  93   $FingerprintBitString = '';
  94   if (!$FingerprintsBitVector) {return $FingerprintBitString;}
  95 
  96   BITSTRINGFORMAT : {
  97     if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { return $FingerprintsBitVector->GetBitsAsBinaryString($BitsOrder); last BITSTRINGFORMAT; }
  98     if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { return $FingerprintsBitVector->GetBitsAsHexadecimalString($BitsOrder); last BITSTRINGFORMAT; }
  99     croak "Error: FingerprintsStringUtil::_GetFingerprintBitsAsString: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString...";
 100   }
 101   return $FingerprintBitString;
 102 }
 103 
 104 # Generate fingerprints vector string...
 105 #
 106 sub GenerateFingerprintsVectorString {
 107   my($FingerprintsObject, $VectorStringFormat) = @_;
 108   my($FingerprintsString, $FingerprintsVector, @FingerprintsStringValues);
 109 
 110   $FingerprintsString = '';
 111   $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector();
 112 
 113   if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector); }
 114 
 115   @FingerprintsStringValues = ();
 116   push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsVector->GetNumOfValues(), $FingerprintsVector->GetType(), $VectorStringFormat);
 117 
 118   $FingerprintsString = join("${FPStringDelim}",  @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintVectorString($FingerprintsVector, $VectorStringFormat);
 119 
 120   return $FingerprintsString;
 121 }
 122 
 123 # Get fingerprint vector string...
 124 #
 125 sub _GetFingerprintVectorString {
 126   my($FingerprintsVector, $VectorStringFormat) = @_;
 127   my($FingerprintString);
 128 
 129   if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector);}
 130 
 131   $FingerprintString = '';
 132   if (!$FingerprintsVector) {return $FingerprintString;}
 133 
 134   VECTORSTRINGFORMAT : {
 135     if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $FingerprintsVector->GetIDsAndValuesString(); last VECTORSTRINGFORMAT; }
 136     if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $FingerprintsVector->GetIDsAndValuesPairsString(); last VECTORSTRINGFORMAT; }
 137     if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $FingerprintsVector->GetValuesAndIDsString(); last VECTORSTRINGFORMAT; }
 138     if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $FingerprintsVector->GetValuesAndIDsPairsString(); last VECTORSTRINGFORMAT; }
 139     if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { return $FingerprintsVector->GetValuesString(); last VECTORSTRINGFORMAT; }
 140     croak "Error: FingerprintsStringUtil::_GetFingerprintVectorString: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values...";
 141   }
 142   return $FingerprintString;
 143 }
 144 
 145 # Get fingerprints string type and description...
 146 sub GetFingerprintsStringTypeAndDescription {
 147   my($FingerprintsString) = @_;
 148   my($Type, $Description);
 149 
 150   ($Type, $Description) = _ParseFingerprintsStringValues($FingerprintsString);
 151 
 152   return ($Type, $Description);
 153 }
 154 
 155 # Get all fingerprints string values...
 156 sub GetFingerprintsStringValues {
 157   my($FingerprintsString) = @_;
 158 
 159   return _ParseFingerprintsStringValues($FingerprintsString);
 160 }
 161 
 162 # Parse fingerprints string and return FingerprintsBitVector or FingerprintsVector object...
 163 #
 164 sub ParseFingerprintsString {
 165   my($FingerprintsString) = @_;
 166 
 167   VECTORTYPE : {
 168     if ($FingerprintsString =~ /^FingerprintsBitVector/i) { return ParseFingerprintsBitVectorString(@_); last VECTORTYPE; }
 169     if ($FingerprintsString =~ /^FingerprintsVector/i) { return ParseFingerprintsVectorString(@_); last VECTORTYPE; }
 170     croak "Error: FingerprintsStringUtil::ParseFingerprintsString: Fingerprints string vector type is not supported. Valid values: FingerprintsBitVector or FingerprintsVector...";
 171   }
 172   return undef;
 173 }
 174 
 175 # Parse fingerprints bit vector string and retrun bit vector...
 176 #
 177 sub ParseFingerprintsBitVectorString {
 178   my($FingerprintsString, $ValidateValues) = @_;
 179   my($ErrorMsgPrefix, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
 180 
 181   $ErrorMsgPrefix = "Error: ParsePathLengthFingerprintsBitVectorString";
 182   ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = _ParseFingerprintsStringValues($FingerprintsString);
 183   if ($ValidateValues) {
 184     _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
 185   }
 186 
 187   return _GenerateFingerprintBitVector($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString);
 188 }
 189 
 190 # Generate fingerints bit vector...
 191 #
 192 sub _GenerateFingerprintBitVector {
 193   my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = @_;
 194   my($FingerprintsBitVector);
 195 
 196   $FingerprintsBitVector = undef;
 197 
 198   BITSTRINGFORMAT : {
 199     if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) {
 200       $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString($BitVectorString, $BitsOrder);
 201       last BITSTRINGFORMAT;
 202     }
 203     if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) {
 204       $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromHexadecimalString($BitVectorString, $BitsOrder);
 205       last BITSTRINGFORMAT;
 206     }
 207     croak "Error: FingerprintsStringUtil::_GenerateFingerprintBitVector: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString...";
 208   }
 209 
 210   if (defined $FingerprintsBitVector) {
 211     # Set fingerints vector type and description...
 212     $FingerprintsBitVector->SetVectorType($VectorType);
 213     $FingerprintsBitVector->SetDescription($Description);
 214 
 215     # Set specified size which might be different from the bit string size due to padding
 216     # used by Perl vec function to handle bit vectors in BitVectot class...
 217     #
 218     $FingerprintsBitVector->SetSpecifiedSize($Size);
 219   }
 220 
 221   return $FingerprintsBitVector;
 222 }
 223 
 224 # Parse fingerprints vector string and retrun vector...
 225 #
 226 sub ParseFingerprintsVectorString {
 227   my($FingerprintsString, $ValidateValues) = @_;
 228   my($ErrorMsgPrefix, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2);
 229 
 230   $ErrorMsgPrefix = "Error: ParseFingerprintsVectorString";
 231   ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = _ParseFingerprintsStringValues($FingerprintsString);
 232 
 233   # No need to check $VectorString1 and $VectorString2 values as they would be
 234   # checked later during the creation of FingerprintsVector...
 235   #
 236   if ($ValidateValues) {
 237     _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $NumOfValues, $VectorValuesType, $VectorStringFormat);
 238   }
 239 
 240   return _GenerateFingerprintVector($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2);
 241 }
 242 
 243 # Generate fingerints vector...
 244 #
 245 sub _GenerateFingerprintVector {
 246   my($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = @_;
 247   my($FingerprintsVector, $VectorString);
 248 
 249   $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1};${VectorString2}";
 250   $FingerprintsVector = undef;
 251 
 252   VECTORSTRINGFORMAT : {
 253     if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) {
 254       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString($VectorValuesType, $VectorString);
 255       last VECTORSTRINGFORMAT;
 256     }
 257     if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) {
 258       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString($VectorValuesType, $VectorString);
 259       last VECTORSTRINGFORMAT;
 260     }
 261     if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) {
 262       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString($VectorValuesType, $VectorString);
 263       last VECTORSTRINGFORMAT;
 264     }
 265     if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) {
 266       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString($VectorValuesType, $VectorString);
 267       last VECTORSTRINGFORMAT;
 268     }
 269     if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) {
 270       $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString($VectorValuesType, $VectorString);
 271       last VECTORSTRINGFORMAT;
 272     }
 273     croak "Error: FingerprintsStringUtil::_GenerateFingerprintVector: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values...";
 274   }
 275 
 276   if (defined $FingerprintsVector) {
 277     # Set fingerints vector type and description...
 278     $FingerprintsVector->SetVectorType($VectorType);
 279     $FingerprintsVector->SetDescription($Description);
 280   }
 281 
 282   return $FingerprintsVector;
 283 }
 284 
 285 # Validate fingerint string values...
 286 #
 287 sub AreFingerprintsStringValuesValid {
 288   my($FingerprintsString) = @_;
 289   my($Value);
 290 
 291   for $Value (_ParseFingerprintsStringValues($FingerprintsString)) {
 292     if (TextUtil::IsEmpty($Value)) {
 293       return 0;
 294     }
 295   }
 296   return 1;
 297 }
 298 
 299 # Get fingerprints description...
 300 #
 301 sub _GetFingerprintsDescription {
 302   my($FingerprintsObject) = @_;
 303   my($Description);
 304 
 305   $Description = $FingerprintsObject->GetDescription();
 306 
 307   return TextUtil::IsEmpty($Description) ? 'No description available for fingerprints' : $Description;
 308 }
 309 
 310 # Parse fingerprints string values...
 311 #
 312 sub _ParseFingerprintsStringValues {
 313   my($FingerprintsString) = @_;
 314 
 315   return split "${FPStringDelim}", $FingerprintsString;
 316 }
 317 
 318 # Check to make sure already parsed fingerprints string values are valid....
 319 #
 320 sub _ValidateFingerprintsStringValues {
 321   my($ErrorMsgPrefix, @Values) = @_;
 322   my($Value);
 323 
 324   for $Value (@Values) {
 325     if (TextUtil::IsEmpty($Value)) {
 326       croak("${ErrorMsgPrefix}: _ValidateFingerprintsStringValues: Fingerprints string format is not valid: An empty value found...");
 327     }
 328   }
 329 }
 330 
 331 # Default bit string format...
 332 #
 333 sub GetDefaultBitStringFormat {
 334   return 'HexadecimalString';
 335 }
 336 
 337 # Default bit order...
 338 #
 339 sub GetDefaultBitsOrder {
 340   return 'Ascending';
 341 }
 342 
 343 # Default vector string format using fingerprints or fingerprints vector object...
 344 #
 345 sub GetDefaultVectorStringFormat {
 346   my($FingerprintsObject) = @_;
 347   my($FingerprintsVector);
 348 
 349   $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector();
 350 
 351   return _GetDefaultVectorStringFormat($FingerprintsVector);
 352 }
 353 
 354 # Default vector string format using fingerprits vector object...
 355 #
 356 sub _GetDefaultVectorStringFormat {
 357   my($FingerprintsVector) = @_;
 358   my($Type);
 359 
 360   $Type = $FingerprintsVector->GetType();
 361 
 362   return ($Type =~ /^NumericalValues$/i) ? 'IDsAndValuesString' : 'ValuesString';
 363 }
 364 
 365 # Fingerprints string delimiter...
 366 #
 367 sub GetFingeprintsStringDelimiter {
 368   return $FPStringDelim;
 369 }
 370