1 package Fingerprints::FingerprintsStringUtil; 2 # 3 # File: FingerprintsStringUtil.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Exporter; 28 use Carp; 29 use TextUtil (); 30 use Fingerprints::FingerprintsBitVector; 31 use Fingerprints::FingerprintsVector; 32 33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 34 35 @ISA = qw(Exporter); 36 @EXPORT = qw(); 37 @EXPORT_OK = qw(AreFingerprintsStringValuesValid GenerateFingerprintsString GenerateFingerprintsBitVectorString GenerateFingerprintsVectorString GetFingerprintsStringTypeAndDescription GetDefaultBitsOrder GetDefaultBitStringFormat GetDefaultVectorStringFormat GetFingeprintsStringDelimiter GetFingerprintsStringValues ParseFingerprintsString ParseFingerprintsBitVectorString ParseFingerprintsVectorString); 38 39 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 40 41 # Fingerprint string values delimiter... 42 my($FPStringDelim) = ';'; 43 44 # Generate fingerprints string... 45 # 46 sub GenerateFingerprintsString { 47 my($FingerprintsObject) = @_; 48 my($VectorType); 49 50 $VectorType = $FingerprintsObject->GetVectorType(); 51 52 VECTORTYPE : { 53 if ($VectorType =~ /^FingerprintsBitVector$/i) { return GenerateFingerprintsBitVectorString(@_); last VECTORTYPE; } 54 if ($VectorType =~ /^FingerprintsVector$/i) { return GenerateFingerprintsVectorString(@_); last VECTORTYPE; } 55 croak "Error: FingerprintsStringUtil::GenerateFingerprintsString: Fingerprints object vector type, $VectorType, is not supported. Valid values: FingerprintsBitVector or FingerprintsVector..."; 56 } 57 return ''; 58 } 59 60 # Generate fingerprints bit vector string... 61 # 62 sub GenerateFingerprintsBitVectorString { 63 my($FingerprintsObject, $BitStringFormat, $BitsOrder) = @_; 64 my($FingerprintsString, $FingerprintsBitVector, @FingerprintsStringValues); 65 66 if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); } 67 if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); } 68 69 $FingerprintsString = ''; 70 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::IsFingerprintsBitVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsBitVector(); 71 72 # Use specified size instead of size: it corresponds to actual size of the fingerprints bit vector; 73 # size reflects actual internal size including any padding. 74 # 75 76 @FingerprintsStringValues = (); 77 push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsBitVector->GetSpecifiedSize(), $BitStringFormat, $BitsOrder); 78 79 $FingerprintsString = join("${FPStringDelim}", @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintBitVectorString($FingerprintsBitVector, $BitStringFormat, $BitsOrder); 80 81 return $FingerprintsString; 82 } 83 84 # Get fingerprint bit vector string... 85 # 86 sub _GetFingerprintBitVectorString { 87 my($FingerprintsBitVector, $BitStringFormat, $BitsOrder) = @_; 88 my($FingerprintBitString); 89 90 if (!$BitStringFormat) { $BitStringFormat = GetDefaultBitStringFormat(); } 91 if (!$BitsOrder) {$BitsOrder = GetDefaultBitsOrder(); } 92 93 $FingerprintBitString = ''; 94 if (!$FingerprintsBitVector) {return $FingerprintBitString;} 95 96 BITSTRINGFORMAT : { 97 if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { return $FingerprintsBitVector->GetBitsAsBinaryString($BitsOrder); last BITSTRINGFORMAT; } 98 if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { return $FingerprintsBitVector->GetBitsAsHexadecimalString($BitsOrder); last BITSTRINGFORMAT; } 99 croak "Error: FingerprintsStringUtil::_GetFingerprintBitsAsString: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString..."; 100 } 101 return $FingerprintBitString; 102 } 103 104 # Generate fingerprints vector string... 105 # 106 sub GenerateFingerprintsVectorString { 107 my($FingerprintsObject, $VectorStringFormat) = @_; 108 my($FingerprintsString, $FingerprintsVector, @FingerprintsStringValues); 109 110 $FingerprintsString = ''; 111 $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector(); 112 113 if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector); } 114 115 @FingerprintsStringValues = (); 116 push @FingerprintsStringValues, ($FingerprintsObject->GetVectorType(), _GetFingerprintsDescription($FingerprintsObject), $FingerprintsVector->GetNumOfValues(), $FingerprintsVector->GetType(), $VectorStringFormat); 117 118 $FingerprintsString = join("${FPStringDelim}", @FingerprintsStringValues) . "${FPStringDelim}" . _GetFingerprintVectorString($FingerprintsVector, $VectorStringFormat); 119 120 return $FingerprintsString; 121 } 122 123 # Get fingerprint vector string... 124 # 125 sub _GetFingerprintVectorString { 126 my($FingerprintsVector, $VectorStringFormat) = @_; 127 my($FingerprintString); 128 129 if (!$VectorStringFormat) { $VectorStringFormat = _GetDefaultVectorStringFormat($FingerprintsVector);} 130 131 $FingerprintString = ''; 132 if (!$FingerprintsVector) {return $FingerprintString;} 133 134 VECTORSTRINGFORMAT : { 135 if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $FingerprintsVector->GetIDsAndValuesString(); last VECTORSTRINGFORMAT; } 136 if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $FingerprintsVector->GetIDsAndValuesPairsString(); last VECTORSTRINGFORMAT; } 137 if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $FingerprintsVector->GetValuesAndIDsString(); last VECTORSTRINGFORMAT; } 138 if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $FingerprintsVector->GetValuesAndIDsPairsString(); last VECTORSTRINGFORMAT; } 139 if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { return $FingerprintsVector->GetValuesString(); last VECTORSTRINGFORMAT; } 140 croak "Error: FingerprintsStringUtil::_GetFingerprintVectorString: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values..."; 141 } 142 return $FingerprintString; 143 } 144 145 # Get fingerprints string type and description... 146 sub GetFingerprintsStringTypeAndDescription { 147 my($FingerprintsString) = @_; 148 my($Type, $Description); 149 150 ($Type, $Description) = _ParseFingerprintsStringValues($FingerprintsString); 151 152 return ($Type, $Description); 153 } 154 155 # Get all fingerprints string values... 156 sub GetFingerprintsStringValues { 157 my($FingerprintsString) = @_; 158 159 return _ParseFingerprintsStringValues($FingerprintsString); 160 } 161 162 # Parse fingerprints string and return FingerprintsBitVector or FingerprintsVector object... 163 # 164 sub ParseFingerprintsString { 165 my($FingerprintsString) = @_; 166 167 VECTORTYPE : { 168 if ($FingerprintsString =~ /^FingerprintsBitVector/i) { return ParseFingerprintsBitVectorString(@_); last VECTORTYPE; } 169 if ($FingerprintsString =~ /^FingerprintsVector/i) { return ParseFingerprintsVectorString(@_); last VECTORTYPE; } 170 croak "Error: FingerprintsStringUtil::ParseFingerprintsString: Fingerprints string vector type is not supported. Valid values: FingerprintsBitVector or FingerprintsVector..."; 171 } 172 return undef; 173 } 174 175 # Parse fingerprints bit vector string and retrun bit vector... 176 # 177 sub ParseFingerprintsBitVectorString { 178 my($FingerprintsString, $ValidateValues) = @_; 179 my($ErrorMsgPrefix, $VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); 180 181 $ErrorMsgPrefix = "Error: ParsePathLengthFingerprintsBitVectorString"; 182 ($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = _ParseFingerprintsStringValues($FingerprintsString); 183 if ($ValidateValues) { 184 _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); 185 } 186 187 return _GenerateFingerprintBitVector($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString); 188 } 189 190 # Generate fingerints bit vector... 191 # 192 sub _GenerateFingerprintBitVector { 193 my($VectorType, $Description, $Size, $BitStringFormat, $BitsOrder, $BitVectorString) = @_; 194 my($FingerprintsBitVector); 195 196 $FingerprintsBitVector = undef; 197 198 BITSTRINGFORMAT : { 199 if ($BitStringFormat =~ /^(BinaryString|Binary|Bin)$/i) { 200 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromBinaryString($BitVectorString, $BitsOrder); 201 last BITSTRINGFORMAT; 202 } 203 if ($BitStringFormat =~ /^(HexadecimalString|Hexadecimal|Hex)$/i) { 204 $FingerprintsBitVector = Fingerprints::FingerprintsBitVector::NewFromHexadecimalString($BitVectorString, $BitsOrder); 205 last BITSTRINGFORMAT; 206 } 207 croak "Error: FingerprintsStringUtil::_GenerateFingerprintBitVector: Specified bit vector string format, $BitStringFormat, is not supported. Value values: Binary, Bin, BinaryString, Hexdecimal, Hex, HexadecimalString..."; 208 } 209 210 if (defined $FingerprintsBitVector) { 211 # Set fingerints vector type and description... 212 $FingerprintsBitVector->SetVectorType($VectorType); 213 $FingerprintsBitVector->SetDescription($Description); 214 215 # Set specified size which might be different from the bit string size due to padding 216 # used by Perl vec function to handle bit vectors in BitVectot class... 217 # 218 $FingerprintsBitVector->SetSpecifiedSize($Size); 219 } 220 221 return $FingerprintsBitVector; 222 } 223 224 # Parse fingerprints vector string and retrun vector... 225 # 226 sub ParseFingerprintsVectorString { 227 my($FingerprintsString, $ValidateValues) = @_; 228 my($ErrorMsgPrefix, $VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2); 229 230 $ErrorMsgPrefix = "Error: ParseFingerprintsVectorString"; 231 ($VectorType, $Description, $NumOfValues, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = _ParseFingerprintsStringValues($FingerprintsString); 232 233 # No need to check $VectorString1 and $VectorString2 values as they would be 234 # checked later during the creation of FingerprintsVector... 235 # 236 if ($ValidateValues) { 237 _ValidateFingerprintsStringValues($ErrorMsgPrefix, $VectorType, $NumOfValues, $VectorValuesType, $VectorStringFormat); 238 } 239 240 return _GenerateFingerprintVector($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2); 241 } 242 243 # Generate fingerints vector... 244 # 245 sub _GenerateFingerprintVector { 246 my($VectorType, $Description, $VectorValuesType, $VectorStringFormat, $VectorString1, $VectorString2) = @_; 247 my($FingerprintsVector, $VectorString); 248 249 $VectorString = TextUtil::IsEmpty($VectorString2) ? $VectorString1 : "${VectorString1};${VectorString2}"; 250 $FingerprintsVector = undef; 251 252 VECTORSTRINGFORMAT : { 253 if ($VectorStringFormat =~ /^(ValuesString|Values)$/i) { 254 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesString($VectorValuesType, $VectorString); 255 last VECTORSTRINGFORMAT; 256 } 257 if ($VectorStringFormat =~ /^(IDsAndValuesString|IDsAndValues)$/i) { 258 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesString($VectorValuesType, $VectorString); 259 last VECTORSTRINGFORMAT; 260 } 261 if ($VectorStringFormat =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { 262 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromIDsAndValuesPairsString($VectorValuesType, $VectorString); 263 last VECTORSTRINGFORMAT; 264 } 265 if ($VectorStringFormat =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { 266 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsString($VectorValuesType, $VectorString); 267 last VECTORSTRINGFORMAT; 268 } 269 if ($VectorStringFormat =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { 270 $FingerprintsVector = Fingerprints::FingerprintsVector::NewFromValuesAndIDsPairsString($VectorValuesType, $VectorString); 271 last VECTORSTRINGFORMAT; 272 } 273 croak "Error: FingerprintsStringUtil::_GenerateFingerprintVector: Specified vector string format, $VectorStringFormat, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValuesString, Values..."; 274 } 275 276 if (defined $FingerprintsVector) { 277 # Set fingerints vector type and description... 278 $FingerprintsVector->SetVectorType($VectorType); 279 $FingerprintsVector->SetDescription($Description); 280 } 281 282 return $FingerprintsVector; 283 } 284 285 # Validate fingerint string values... 286 # 287 sub AreFingerprintsStringValuesValid { 288 my($FingerprintsString) = @_; 289 my($Value); 290 291 for $Value (_ParseFingerprintsStringValues($FingerprintsString)) { 292 if (TextUtil::IsEmpty($Value)) { 293 return 0; 294 } 295 } 296 return 1; 297 } 298 299 # Get fingerprints description... 300 # 301 sub _GetFingerprintsDescription { 302 my($FingerprintsObject) = @_; 303 my($Description); 304 305 $Description = $FingerprintsObject->GetDescription(); 306 307 return TextUtil::IsEmpty($Description) ? 'No description available for fingerprints' : $Description; 308 } 309 310 # Parse fingerprints string values... 311 # 312 sub _ParseFingerprintsStringValues { 313 my($FingerprintsString) = @_; 314 315 return split "${FPStringDelim}", $FingerprintsString; 316 } 317 318 # Check to make sure already parsed fingerprints string values are valid.... 319 # 320 sub _ValidateFingerprintsStringValues { 321 my($ErrorMsgPrefix, @Values) = @_; 322 my($Value); 323 324 for $Value (@Values) { 325 if (TextUtil::IsEmpty($Value)) { 326 croak("${ErrorMsgPrefix}: _ValidateFingerprintsStringValues: Fingerprints string format is not valid: An empty value found..."); 327 } 328 } 329 } 330 331 # Default bit string format... 332 # 333 sub GetDefaultBitStringFormat { 334 return 'HexadecimalString'; 335 } 336 337 # Default bit order... 338 # 339 sub GetDefaultBitsOrder { 340 return 'Ascending'; 341 } 342 343 # Default vector string format using fingerprints or fingerprints vector object... 344 # 345 sub GetDefaultVectorStringFormat { 346 my($FingerprintsObject) = @_; 347 my($FingerprintsVector); 348 349 $FingerprintsVector = Fingerprints::FingerprintsVector::IsFingerprintsVector($FingerprintsObject) ? $FingerprintsObject : $FingerprintsObject->GetFingerprintsVector(); 350 351 return _GetDefaultVectorStringFormat($FingerprintsVector); 352 } 353 354 # Default vector string format using fingerprits vector object... 355 # 356 sub _GetDefaultVectorStringFormat { 357 my($FingerprintsVector) = @_; 358 my($Type); 359 360 $Type = $FingerprintsVector->GetType(); 361 362 return ($Type =~ /^NumericalValues$/i) ? 'IDsAndValuesString' : 'ValuesString'; 363 } 364 365 # Fingerprints string delimiter... 366 # 367 sub GetFingeprintsStringDelimiter { 368 return $FPStringDelim; 369 } 370