MayaChemTools

   1 package Graph::CyclesDetection;
   2 #
   3 # File: CyclesDetection.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Graph;
  30 use Graph::Path;
  31 use Graph::PathGraph;
  32 use BitVector;
  33 
  34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  35 
  36 @ISA = qw(Exporter);
  37 @EXPORT = qw();
  38 @EXPORT_OK = qw();
  39 
  40 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  41 
  42 # Setup class variables...
  43 my($ClassName);
  44 _InitializeClass();
  45 
  46 # Overload Perl functions...
  47 use overload '""' => 'StringifyCyclesDetection';
  48 
  49 # Class constructor...
  50 sub new {
  51   my($Class, $Graph) = @_;
  52 
  53   # Initialize object...
  54   my $This = {};
  55   bless $This, ref($Class) || $Class;
  56   $This->_InitializeCyclesDetection($Graph);
  57 
  58   return $This;
  59 }
  60 
  61 # Initialize object data...
  62 sub _InitializeCyclesDetection {
  63   my($This, $Graph) = @_;
  64 
  65   $This->{Graph} = $Graph;
  66 
  67   # Cycles list...
  68   @{$This->{AllCyclicPaths}} = ();
  69 
  70   # Cyclic paths which are not part of any other cycle...
  71   @{$This->{IndependentCyclicPaths}} = ();
  72 
  73   return $This;
  74 }
  75 
  76 # Initialize class ...
  77 sub _InitializeClass {
  78   #Class name...
  79   $ClassName = __PACKAGE__;
  80 }
  81 
  82 # Detect all cycles in graph...
  83 #
  84 sub DetectCycles {
  85   my($This) = @_;
  86 
  87   return $This->DetectCyclesUsingCollapsingPathGraphMethodology();
  88 }
  89 
  90 # Detect all cycles in the graph using collapsing path graph [Ref 31] methodology...
  91 #
  92 # Note:
  93 #   . For topologically complex graphs containing large number of cycles,
  94 #     CollapseVertexAndCollectCyclicPathsDetectCycles method implemented in
  95 #     PathGraph can time out in which case no cycles are detected or
  96 #     assigned.
  97 #
  98 sub DetectCyclesUsingCollapsingPathGraphMethodology {
  99   my($This) = @_;
 100   my($PathGraph);
 101 
 102   # Create a path graph...
 103   $PathGraph = new Graph::PathGraph($This->{Graph});
 104 
 105   # For a vertex to be in a cycle, its degree must be >=2. So delete vertices recursively
 106   # till all vertices with degree less than 2 have been deleted...
 107   $PathGraph->DeleteVerticesWithDegreeLessThan(2);
 108 
 109   # Setup a VertexID and EdgeID to index map usage during retrieval of independent cycles to
 110   # avoid going over all vertices in all cylic paths later...
 111   #
 112   my($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex);
 113   ($VertexIDsToIndicesRef, $LargestVertexIndex) = $This->_SetupVertexIDsToIndicesMap($PathGraph);
 114   ($EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = $This->_SetupEdgeIDsToIndicesMap($PathGraph);
 115 
 116   # Recursively collapse vertices with lowest degree...
 117   my($VertexID, $CycleVertexID);
 118   while ($VertexID = $PathGraph->GetVertexWithSmallestDegree()) {
 119       if (!$PathGraph->CollapseVertexAndCollectCyclicPaths($VertexID)) {
 120         # Cycles detection didn't finish...
 121         return undef;
 122       }
 123   }
 124 
 125   # Get detected cycles and save 'em sorted by size...
 126   push @{$This->{AllCyclicPaths}}, sort { $a->GetLength() <=> $b->GetLength() } $PathGraph->GetCyclicPaths();
 127 
 128   # Retrieve independent cyclic paths...
 129   return $This->_RetrieveIndependentCycles($VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex);
 130 }
 131 
 132 # Retrieve and save independent cyclic paths...
 133 #
 134 # Set of independent cycles identified using this method doesn't correspond to basis set of
 135 # rings or smallest set of smallest rings (SSSR) [ Refs 29-30 ]; instead, set of cycles identified
 136 # as independent cycles simply correspond to cycles which contain no other cycle as their
 137 # subcycles and can't be described as linear combination of smaller cycles. And it also happen
 138 # to contain all the rings in basis set of rings and SSSR. In other words, it's a superset of basis set
 139 # of cycles and SSSR. For example, six four membered cycles are identified for cubane which is one
 140 # more than the basis set of cycles.
 141 #
 142 sub _RetrieveIndependentCycles {
 143   my($This, $VertexIDsToIndicesRef, $LargestVertexIndex, $EdgeIDsToIndicesRef, $LargestEdgeIDIndex) = @_;
 144 
 145   # Only 1 or 0 cyclic paths...
 146   if (@{$This->{AllCyclicPaths}} <= 1) {
 147     push @{$This->{IndependentCyclicPaths}}, @{$This->{AllCyclicPaths}};
 148     return $This;
 149   }
 150 
 151   # Setup bit vectors for each cyclic path with size of each bit vector corresponding to
 152   # maximum vertex index plus one...
 153   my($CyclicPath, $BitVector, @BitNums, @CyclicPathBitVectors, @CyclicPathEdgeIDsBitVectors);
 154 
 155   @CyclicPathBitVectors = (); @CyclicPathEdgeIDsBitVectors = ();
 156 
 157   # Set bits corresponding to VertexIDs EdgeIDs using their indices...
 158   for $CyclicPath (@{$This->{AllCyclicPaths}}) {
 159     $BitVector = new BitVector($LargestVertexIndex);
 160     @BitNums = map { $VertexIDsToIndicesRef->{$_} } $CyclicPath->GetVertices();
 161     $BitVector->SetBits(@BitNums);
 162     push @CyclicPathBitVectors, $BitVector;
 163 
 164     $BitVector = new BitVector($LargestEdgeIDIndex);
 165     @BitNums = map { $EdgeIDsToIndicesRef->{$_} } $This->_GetPathEdgeIDs($CyclicPath);
 166     $BitVector->SetBits(@BitNums);
 167     push @CyclicPathEdgeIDsBitVectors, $BitVector;
 168   }
 169 
 170   # First smallest cyclic path always ends up as an independent cyclic path...
 171   push @{$This->{IndependentCyclicPaths}}, $This->{AllCyclicPaths}[0];
 172 
 173   # Retrieve other independent cyclic paths...
 174   my($CurrentIndex, $PreviousIndex, $CurrentCyclicPath, $PreviousCyclicPath, $CurrentPathLength, $PreviousPathLength, $CurrentBitVector, $PreviousBitVector, $CurrentAndPreviousBitVectpor, $AllPreviousSmallerPathsBitVector, $AllPreviousSmallerPathsEdgeIDsBitVector, $CurrentEdgeIDsBitVector, $AndBitVector, %SmallerPathAlreadyAdded, %SkipPath);
 175 
 176   %SkipPath = ();
 177   %SmallerPathAlreadyAdded = ();
 178   $AllPreviousSmallerPathsBitVector = new BitVector($LargestVertexIndex);
 179   $AllPreviousSmallerPathsEdgeIDsBitVector = new BitVector($LargestEdgeIDIndex);
 180 
 181   CURRENT: for $CurrentIndex (1 .. $#{$This->{AllCyclicPaths}}) {
 182     if (exists $SkipPath{$CurrentIndex}) {
 183       next CURRENT;
 184     }
 185     $CurrentCyclicPath = $This->{AllCyclicPaths}[$CurrentIndex];
 186     $CurrentBitVector = $CyclicPathBitVectors[$CurrentIndex];
 187     $CurrentPathLength = $CurrentCyclicPath->GetLength();
 188 
 189     PREVIOUS: for $PreviousIndex (0 .. ($CurrentIndex - 1)) {
 190       if (exists $SkipPath{$PreviousIndex}) {
 191         next PREVIOUS;
 192       }
 193       $PreviousCyclicPath = $This->{AllCyclicPaths}[$PreviousIndex];
 194       $PreviousBitVector = $CyclicPathBitVectors[$PreviousIndex];
 195 
 196       # Is previous path a subset of current path?
 197       $CurrentAndPreviousBitVectpor = $PreviousBitVector &  $CurrentBitVector;
 198       if ($PreviousBitVector->GetNumOfSetBits() == $CurrentAndPreviousBitVectpor->GetNumOfSetBits()) {
 199         # Current path doesn't qualify an independent path...
 200         $SkipPath{$CurrentIndex} = 1;
 201         next CURRENT;
 202       }
 203 
 204       $PreviousPathLength = $PreviousCyclicPath->GetLength();
 205       if ($PreviousPathLength < $CurrentPathLength) {
 206         # Mark cycle vertices seen in cyclic paths with length smaller than current path...
 207         if (! exists $SmallerPathAlreadyAdded{$PreviousIndex}) {
 208           $SmallerPathAlreadyAdded{$PreviousIndex} = 1;
 209           $AllPreviousSmallerPathsBitVector = $AllPreviousSmallerPathsBitVector | $PreviousBitVector;
 210           $AllPreviousSmallerPathsEdgeIDsBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector | $CyclicPathEdgeIDsBitVectors[$PreviousIndex];
 211         }
 212       }
 213     }
 214     if ($AllPreviousSmallerPathsBitVector->GetNumOfSetBits()) {
 215       # Is current path a linear combination of smaller paths?
 216       $AndBitVector = $AllPreviousSmallerPathsBitVector &  $CurrentBitVector;
 217       if ($CurrentBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) {
 218         # Are all edges in the current path already present in smaller paths?
 219         $CurrentEdgeIDsBitVector = $CyclicPathEdgeIDsBitVectors[$CurrentIndex];
 220         $AndBitVector = $AllPreviousSmallerPathsEdgeIDsBitVector &  $CurrentEdgeIDsBitVector;
 221 
 222         if ($CurrentEdgeIDsBitVector->GetNumOfSetBits() == $AndBitVector->GetNumOfSetBits()) {
 223           # Current path doesn't qualify an independent path...
 224           $SkipPath{$CurrentIndex} = 1;
 225           next CURRENT;
 226         }
 227       }
 228     }
 229     # It's an independent cyclic path...
 230     push @{$This->{IndependentCyclicPaths}}, $CurrentCyclicPath;
 231   }
 232   return $This;
 233 }
 234 
 235 # Setup a VertexID to index map...
 236 #
 237 sub _SetupVertexIDsToIndicesMap {
 238   my($This, $PathGraph) = @_;
 239   my($LargestVertexIndex, @VertexIDs, %VertexIDsMap);
 240 
 241   %VertexIDsMap = (); @VertexIDs = (); $LargestVertexIndex = 0;
 242 
 243   @VertexIDs = $PathGraph->GetVertices();
 244   if (! @VertexIDs) {
 245     return (\%VertexIDsMap, $LargestVertexIndex);
 246   }
 247   @VertexIDsMap{ @VertexIDs } = (0 .. $#VertexIDs);
 248   $LargestVertexIndex = scalar @VertexIDs;
 249 
 250   return (\%VertexIDsMap, $LargestVertexIndex);
 251 }
 252 
 253 # Setup a Edge to index map using paths associated to egdes in an intial
 254 # path graph...
 255 #
 256 sub _SetupEdgeIDsToIndicesMap {
 257   my($This, $PathGraph) = @_;
 258   my($Path, $LargestEdgeIndex, @EdgeIDs, %EdgeIDsMap);
 259 
 260   %EdgeIDsMap = (); @EdgeIDs = (); $LargestEdgeIndex = 0;
 261 
 262   @EdgeIDs = ();
 263   for $Path ($PathGraph->GetPaths()) {
 264     push @EdgeIDs, $This->_GetPathEdgeIDs($Path);
 265   }
 266 
 267   if (! @EdgeIDs) {
 268     return (\%EdgeIDsMap, $LargestEdgeIndex);
 269   }
 270 
 271   @EdgeIDsMap{ @EdgeIDs } = (0 .. $#EdgeIDs);
 272   $LargestEdgeIndex = scalar @EdgeIDs;
 273 
 274   return (\%EdgeIDsMap, $LargestEdgeIndex);
 275 }
 276 
 277 # Get path edge IDs or number of edges. The edge IDs are generated from
 278 # edge vertices and correpond to VertexID1-VertexID2 where VertexID1 <=
 279 # VertexID2.
 280 #
 281 sub _GetPathEdgeIDs {
 282   my($This, $Path) = @_;
 283   my(@EdgesVertexIDs, @EdgeIDs);
 284 
 285   @EdgesVertexIDs = (); @EdgeIDs = ();
 286   @EdgesVertexIDs = $Path->GetEdges();
 287   if (!@EdgesVertexIDs) {
 288     return wantarray ? @EdgeIDs : (scalar @EdgeIDs);
 289   }
 290 
 291   # Set up edge IDs...
 292   my($Index, $VertexID1, $VertexID2, $EdgeID);
 293 
 294   for ($Index = 0; $Index < $#EdgesVertexIDs; $Index += 2) {
 295     $VertexID1 = $EdgesVertexIDs[$Index]; $VertexID2 = $EdgesVertexIDs[$Index + 1];
 296     $EdgeID = ($VertexID1 <= $VertexID2) ? "$VertexID1-$VertexID2" : "$VertexID2-$VertexID1";
 297     push @EdgeIDs, $EdgeID;
 298   }
 299 
 300   return wantarray ? @EdgeIDs : (scalar @EdgeIDs);
 301 }
 302 
 303 # Return an array containing references to cyclic paths or number of cylic paths...
 304 #
 305 sub GetAllCyclicPaths {
 306   my($This) = @_;
 307 
 308   return wantarray ? @{$This->{AllCyclicPaths}} : scalar @{$This->{AllCyclicPaths}};
 309 }
 310 
 311 # Get cyclic paths which are independent. In otherwords, cycles which don't contain any other
 312 # cycle as their subset...
 313 #
 314 sub GetIndependentCyclicPaths {
 315   my($This) = @_;
 316 
 317   return wantarray ? @{$This->{IndependentCyclicPaths}} : scalar @{$This->{IndependentCyclicPaths}};
 318 }
 319 
 320 # Return a string containg data for CyclesDetection object...
 321 sub StringifyCyclesDetection {
 322   my($This) = @_;
 323   my($CyclesDetectionString, $CyclesCount, $CyclicPath);
 324 
 325   $CyclesCount = @{$This->{AllCyclicPaths}};
 326   $CyclesDetectionString = "AllCycles: Count - $CyclesCount";
 327   for $CyclicPath (@{$This->{AllCyclicPaths}}) {
 328     $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices());
 329   }
 330 
 331   $CyclesCount = @{$This->{IndependentCyclicPaths}};
 332   $CyclesDetectionString .= "\nIndependentCycles: Count - $CyclesCount";
 333   for $CyclicPath (@{$This->{IndependentCyclicPaths}}) {
 334     $CyclesDetectionString .= "; Cycle: " . join('-', $CyclicPath->GetVertices());
 335   }
 336 
 337   return $CyclesDetectionString;
 338 }
 339 
 340 # Return a reference to new cycle detection object...
 341 sub Copy {
 342   my($This) = @_;
 343   my($NewCyclesDetection);
 344 
 345   $NewCyclesDetection = Storable::dclone($This);
 346 
 347   return $NewCyclesDetection;
 348 }
 349