MayaChemTools

   1 package FileIO::SDFileIO;
   2 #
   3 # File: SDFileIO.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use TextUtil ();
  31 use FileUtil ();
  32 use SDFileUtil ();
  33 use FileIO::FileIO;
  34 use FileIO::MDLMolFileIO;
  35 use Molecule;
  36 
  37 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  38 
  39 @ISA = qw(FileIO::FileIO Exporter);
  40 @EXPORT = qw();
  41 @EXPORT_OK = qw(IsSDFile);
  42 
  43 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  44 
  45 # Setup class variables...
  46 my($ClassName);
  47 _InitializeClass();
  48 
  49 # Class constructor...
  50 sub new {
  51   my($Class, %NamesAndValues) = @_;
  52 
  53   # Initialize object...
  54   my $This = $Class->SUPER::new();
  55   bless $This, ref($Class) || $Class;
  56   $This->_InitializeSDFileIO();
  57 
  58   $This->_InitializeSDFileIOProperties(%NamesAndValues);
  59 
  60   return $This;
  61 }
  62 
  63 # Initialize any local object data...
  64 #
  65 sub _InitializeSDFileIO {
  66   my($This) = @_;
  67 
  68   # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically...
  69   $This->{SortDataFieldsDuringOutput} = 'No';
  70 
  71   return $This;
  72 }
  73 
  74 # Initialize class ...
  75 sub _InitializeClass {
  76   #Class name...
  77   $ClassName = __PACKAGE__;
  78 
  79 }
  80 
  81 # Initialize object values...
  82 sub _InitializeSDFileIOProperties {
  83   my($This, %NamesAndValues) = @_;
  84 
  85   # All other property names and values along with all Set/Get<PropertyName> methods
  86   # are implemented on-demand using ObjectProperty class.
  87 
  88   my($Name, $Value, $MethodName);
  89   while (($Name, $Value) = each  %NamesAndValues) {
  90     $MethodName = "Set${Name}";
  91     $This->$MethodName($Value);
  92   }
  93 
  94   if (!exists $NamesAndValues{Name}) {
  95     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
  96   }
  97 
  98   # Make sure it's a SD file...
  99   $Name = $NamesAndValues{Name};
 100   if (!$This->IsSDFile($Name)) {
 101     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format...";
 102   }
 103 
 104   return $This;
 105 }
 106 
 107 # Is it a SD file?
 108 sub IsSDFile ($;$) {
 109   my($FirstParameter, $SecondParameter) = @_;
 110   my($This, $FileName, $Status);
 111 
 112   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 113     ($This, $FileName) = ($FirstParameter, $SecondParameter);
 114   }
 115   else {
 116     $FileName = $FirstParameter;
 117   }
 118 
 119   # Check file extension...
 120   $Status = FileUtil::CheckFileType($FileName, "sd sdf");
 121 
 122   return $Status;
 123 }
 124 
 125 # Read molecule from file and return molecule object...
 126 sub ReadMolecule {
 127   my($This) = @_;
 128   my($FileHandle);
 129 
 130   $FileHandle = $This->GetFileHandle();
 131   return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
 132 }
 133 
 134 # Write compound data along with any data field label and values using Molecule object...
 135 sub WriteMolecule {
 136   my($This, $Molecule) = @_;
 137 
 138   if (!(defined($Molecule) && $Molecule->IsMolecule())) {
 139     carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
 140     return $This;
 141   }
 142   my($FileHandle);
 143   $FileHandle = $This->GetFileHandle();
 144 
 145   print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
 146 
 147   return $This;
 148 }
 149 
 150 # Retrieve molecule string...
 151 sub ReadMoleculeString {
 152   my($This) = @_;
 153   my($FileHandle);
 154 
 155   $FileHandle = $This->GetFileHandle();
 156   return SDFileUtil::ReadCmpdString($FileHandle);
 157 }
 158 
 159 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
 160 # method or a package function.
 161 #
 162 sub ParseMoleculeString {
 163   my($FirstParameter, $SecondParameter) = @_;
 164   my($This, $MoleculeString);
 165 
 166   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 167     ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
 168   }
 169   else {
 170     $MoleculeString = $FirstParameter;
 171     $This = undef;
 172   }
 173   if (!$MoleculeString) {
 174     return undef;
 175   }
 176   # Parse molecule data...
 177   my($Molecule);
 178   $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString);
 179 
 180   # Process data label/value pairs...
 181   my(@MoleculeLines, @DataLabels, %DataLabelsAndValues);
 182 
 183   %DataLabelsAndValues = ();
 184   @MoleculeLines = split /\n/, $MoleculeString;
 185   @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines);
 186   %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines);
 187 
 188   # Store reference to data labels to keep track of their initial order in SD file...
 189   $Molecule->SetDataFieldLabels(\@DataLabels);
 190 
 191   # Store reference to SD data label/value pairs hash as a generic property of molecule...
 192   $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues);
 193 
 194   return $Molecule;
 195 }
 196 
 197 # Generate molecule string using molecule object...
 198 sub GenerateMoleculeString {
 199   my($FirstParameter, $SecondParameter) = @_;
 200   my($This, $Molecule);
 201 
 202   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 203     ($This, $Molecule) = ($FirstParameter, $SecondParameter);
 204   }
 205   else {
 206     $Molecule = $FirstParameter;
 207     $This = undef;
 208   }
 209   if (!defined($Molecule)) {
 210     return undef;
 211   }
 212   # Generate CTAB data...
 213   my($CmpdString);
 214   $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule);
 215 
 216   # Generate any data field labels and values...
 217   my($DataFieldLabelsAndValuesString);
 218 
 219   $DataFieldLabelsAndValuesString = '';
 220   if ($Molecule->HasProperty('DataFieldLabels')) {
 221     my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
 222 
 223     $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0;
 224 
 225     $DataFieldLabelsRef = $Molecule->GetDataFieldLabels();
 226     $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 227     $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
 228   }
 229 
 230   return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$";
 231 }
 232 
 233 
 234 # Is it a SDFileIO object?
 235 sub _IsSDFileIO {
 236   my($Object) = @_;
 237 
 238   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 239 }
 240