1 package FileIO::SDFileIO; 2 # 3 # File: SDFileIO.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use TextUtil (); 31 use FileUtil (); 32 use SDFileUtil (); 33 use FileIO::FileIO; 34 use FileIO::MDLMolFileIO; 35 use Molecule; 36 37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 38 39 @ISA = qw(FileIO::FileIO Exporter); 40 @EXPORT = qw(); 41 @EXPORT_OK = qw(IsSDFile); 42 43 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 44 45 # Setup class variables... 46 my($ClassName); 47 _InitializeClass(); 48 49 # Class constructor... 50 sub new { 51 my($Class, %NamesAndValues) = @_; 52 53 # Initialize object... 54 my $This = $Class->SUPER::new(); 55 bless $This, ref($Class) || $Class; 56 $This->_InitializeSDFileIO(); 57 58 $This->_InitializeSDFileIOProperties(%NamesAndValues); 59 60 return $This; 61 } 62 63 # Initialize any local object data... 64 # 65 sub _InitializeSDFileIO { 66 my($This) = @_; 67 68 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically... 69 $This->{SortDataFieldsDuringOutput} = 'No'; 70 71 return $This; 72 } 73 74 # Initialize class ... 75 sub _InitializeClass { 76 #Class name... 77 $ClassName = __PACKAGE__; 78 79 } 80 81 # Initialize object values... 82 sub _InitializeSDFileIOProperties { 83 my($This, %NamesAndValues) = @_; 84 85 # All other property names and values along with all Set/Get<PropertyName> methods 86 # are implemented on-demand using ObjectProperty class. 87 88 my($Name, $Value, $MethodName); 89 while (($Name, $Value) = each %NamesAndValues) { 90 $MethodName = "Set${Name}"; 91 $This->$MethodName($Value); 92 } 93 94 if (!exists $NamesAndValues{Name}) { 95 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 96 } 97 98 # Make sure it's a SD file... 99 $Name = $NamesAndValues{Name}; 100 if (!$This->IsSDFile($Name)) { 101 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format..."; 102 } 103 104 return $This; 105 } 106 107 # Is it a SD file? 108 sub IsSDFile ($;$) { 109 my($FirstParameter, $SecondParameter) = @_; 110 my($This, $FileName, $Status); 111 112 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 113 ($This, $FileName) = ($FirstParameter, $SecondParameter); 114 } 115 else { 116 $FileName = $FirstParameter; 117 } 118 119 # Check file extension... 120 $Status = FileUtil::CheckFileType($FileName, "sd sdf"); 121 122 return $Status; 123 } 124 125 # Read molecule from file and return molecule object... 126 sub ReadMolecule { 127 my($This) = @_; 128 my($FileHandle); 129 130 $FileHandle = $This->GetFileHandle(); 131 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); 132 } 133 134 # Write compound data along with any data field label and values using Molecule object... 135 sub WriteMolecule { 136 my($This, $Molecule) = @_; 137 138 if (!(defined($Molecule) && $Molecule->IsMolecule())) { 139 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; 140 return $This; 141 } 142 my($FileHandle); 143 $FileHandle = $This->GetFileHandle(); 144 145 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; 146 147 return $This; 148 } 149 150 # Retrieve molecule string... 151 sub ReadMoleculeString { 152 my($This) = @_; 153 my($FileHandle); 154 155 $FileHandle = $This->GetFileHandle(); 156 return SDFileUtil::ReadCmpdString($FileHandle); 157 } 158 159 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class 160 # method or a package function. 161 # 162 sub ParseMoleculeString { 163 my($FirstParameter, $SecondParameter) = @_; 164 my($This, $MoleculeString); 165 166 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 167 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); 168 } 169 else { 170 $MoleculeString = $FirstParameter; 171 $This = undef; 172 } 173 if (!$MoleculeString) { 174 return undef; 175 } 176 # Parse molecule data... 177 my($Molecule); 178 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); 179 180 # Process data label/value pairs... 181 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues); 182 183 %DataLabelsAndValues = (); 184 @MoleculeLines = split /\n/, $MoleculeString; 185 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines); 186 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines); 187 188 # Store reference to data labels to keep track of their initial order in SD file... 189 $Molecule->SetDataFieldLabels(\@DataLabels); 190 191 # Store reference to SD data label/value pairs hash as a generic property of molecule... 192 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues); 193 194 return $Molecule; 195 } 196 197 # Generate molecule string using molecule object... 198 sub GenerateMoleculeString { 199 my($FirstParameter, $SecondParameter) = @_; 200 my($This, $Molecule); 201 202 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 203 ($This, $Molecule) = ($FirstParameter, $SecondParameter); 204 } 205 else { 206 $Molecule = $FirstParameter; 207 $This = undef; 208 } 209 if (!defined($Molecule)) { 210 return undef; 211 } 212 # Generate CTAB data... 213 my($CmpdString); 214 $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); 215 216 # Generate any data field labels and values... 217 my($DataFieldLabelsAndValuesString); 218 219 $DataFieldLabelsAndValuesString = ''; 220 if ($Molecule->HasProperty('DataFieldLabels')) { 221 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); 222 223 $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0; 224 225 $DataFieldLabelsRef = $Molecule->GetDataFieldLabels(); 226 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); 227 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); 228 } 229 230 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$"; 231 } 232 233 234 # Is it a SDFileIO object? 235 sub _IsSDFileIO { 236 my($Object) = @_; 237 238 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 239 } 240