Made stats use multiple files to reduce memory usage
authorDan
Sat, 11 Oct 2008 20:28:32 -0400
changeset 15 5e2d1514ccd0
parent 14 506a7e0fb106
child 16 01c6a003d762
Made stats use multiple files to reduce memory usage
htdocs/index.php
modules/stats.php
stats-fe.php
stats/split-stats.php
--- a/htdocs/index.php	Sat Oct 11 23:09:13 2008 +0000
+++ b/htdocs/index.php	Sat Oct 11 20:28:32 2008 -0400
@@ -26,7 +26,7 @@
         <?php
         $tz_display = str_replace('_', ' ', str_replace('/', ': ', $tz));
         echo 'Time zone: ' . $tz_display . ' [<a href="changetz.php">change</a>]<br />';
-        echo '<small>The time now is ' . date('H:i:s') . '.<br />Statistics last written to disk at ' . date('H:i:s', filemtime('../stats-data.php')) . '.</small>';
+        echo '<small>The time now is ' . date('H:i:s') . '.<br />Statistics last written to disk at ' . date('H:i:s', stats_last_updated()) . '.</small>';
         ?>
       </p>
       <p>
--- a/modules/stats.php	Sat Oct 11 23:09:13 2008 +0000
+++ b/modules/stats.php	Sat Oct 11 20:28:32 2008 -0400
@@ -8,7 +8,8 @@
   'v' => '+'
 );
 $stats_data = array('anonymous' => array(), 'messages' => array());
-@include('./stats-data.php');
+$stats_day = gmdate('Ymd');
+@include("./stats/stats-data-$stats_day.php");
 unset($stats_data['members']);
 $stats_data['members'] =& $stats_memberlist;
 
@@ -232,6 +233,9 @@
 
 function stats_cron()
 {
+  global $stats_day;
+  $stats_day = gmdate('Ymd');
+  
   static $commit_time = 0;
   $now = time();
   // commit to disk every 1 minute
@@ -244,13 +248,13 @@
 
 function stats_commit()
 {
-  global $stats_data;
+  global $stats_data, $stats_day;
   ob_start();
   var_export($stats_data);
   $stats_data_exported = ob_get_contents();
   ob_end_clean();
   
-  $fp = @fopen('./stats-data.php', 'w');
+  $fp = @fopen("./stats/stats-data-$stats_day.php", 'w');
   if ( !$fp )
     return false;
   fwrite($fp, "<?php\n\$stats_data = $stats_data_exported;\n");
--- a/stats-fe.php	Sat Oct 11 23:09:13 2008 +0000
+++ b/stats-fe.php	Sat Oct 11 20:28:32 2008 -0400
@@ -7,12 +7,10 @@
  * @author Dan Fuhry <dan@enanocms.org>
  */
 
-if ( !isset($GLOBALS['stats_data']) )
-{
-  require(dirname(__FILE__) . '/stats-data.php');
-  $data =& $stats_data;
-}
+$stats_merged_data = array('counts' => array(), 'messages' => array());
+$stats_data =& $stats_merged_data;
 
+define('ENANOBOT_ROOT', dirname(__FILE__));
 define('NOW', time());
 
 /**
@@ -25,18 +23,18 @@
 
 function stats_message_count($channel, $mins = 10, $base = NOW)
 {
-  global $data;
+  global $stats_merged_data;
   
   $time_min = $base - ( $mins * 60 );
   $time_max = $base;
   
-  if ( !isset($data['messages'][$channel]) )
+  if ( !isset($stats_merged_data['messages'][$channel]) )
   {
     return 0;
   }
   
   $count = 0;
-  foreach ( $data['messages'][$channel] as $message )
+  foreach ( $stats_merged_data['messages'][$channel] as $message )
   {
     if ( $message['time'] >= $time_min && $message['time'] <= $time_max )
     {
@@ -57,7 +55,7 @@
 
 function stats_activity_percent($channel, $mins = 10, $base = NOW)
 {
-  global $data;
+  global $stats_merged_data;
   if ( !($total = stats_message_count($channel, $mins, $base)) )
   {
     return array();
@@ -66,7 +64,7 @@
   $usercounts = array();
   $time_min = $base - ( $mins * 60 );
   $time_max = $base;
-  foreach ( $data['messages'][$channel] as $message )
+  foreach ( $stats_merged_data['messages'][$channel] as $message )
   {
     if ( $message['time'] >= $time_min && $message['time'] <= $time_max )
     {
@@ -82,3 +80,84 @@
   arsort($results);
   return $results;
 }
+
+/**
+ * Loads X days of statistics, minimum.
+ * @param int Days to load, default is 1
+ */
+ 
+function load_stats_data($days = 1)
+{
+  $days++;
+  for ( $i = 0; $i < $days; $i++ )
+  {
+    $day = NOW - ( $i * 86400 );
+    $day = gmdate('Ymd', $day);
+    if ( file_exists(ENANOBOT_ROOT . "/stats/stats-data-$day.php") )
+    {
+      require(ENANOBOT_ROOT . "/stats/stats-data-$day.php");
+      stats_merge($stats_data);
+    }
+  }
+}
+
+/**
+ * Return the time that the stats DB was last updated.
+ * @return int
+ */
+
+function stats_last_updated()
+{
+  $day = gmdate('Ymd');
+  $file = ENANOBOT_ROOT . "/stats/stats-data-$day.php";
+  return ( file_exists($file) ) ? filemtime($file) : 0;
+}
+
+/**
+ * Merges a newly loaded stats array with the current cache in RAM.
+ * @param array Data to merge
+ * @access private
+ */
+
+function stats_merge($data)
+{
+  global $stats_merged_data;
+  foreach ( $data['counts'] as $channel => $chaninfo )
+  {
+    if ( isset($stats_merged_data['counts'][$channel]) )
+    {
+      foreach ( $stats_merged_data['counts'][$channel] as $key => &$value )
+      {
+        if ( is_int($value) )
+        {
+          $value = max($value, $chaninfo[$key]);
+        }
+        else if ( is_array($value) )
+        {
+          $value = array_merge($value, $chaninfo[$key]);
+        }
+      }
+    }
+    else
+    {
+      $stats_merged_data['counts'][$channel] = $chaninfo;
+    }
+  }
+  foreach ( $data['messages'] as $channel => $chandata )
+  {
+    if ( isset($stats_merged_data['messages'][$channel]) )
+    {
+      foreach ( $chandata as $message )
+      {
+        $stats_merged_data['messages'][$channel][] = $message;
+      }
+    }
+    else
+    {
+      $stats_merged_data['messages'][$channel] = $chandata;
+    }
+  }
+}
+
+load_stats_data();
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stats/split-stats.php	Sat Oct 11 20:28:32 2008 -0400
@@ -0,0 +1,85 @@
+<?php
+
+@set_time_limit(0);
+@ini_set('memory_limit', '256M');
+
+if ( $dir = @opendir('.') )
+{
+  while ( $dh = @readdir($dir) )
+  {
+    if ( !preg_match('/^stats-data(-[0-9]+)\.php$/', $dh) )
+      continue;
+    
+    split_stats_file($dh);
+  }
+  closedir($dir);
+}
+
+function split_stats_file($file)
+{
+  echo "loading $file";
+  
+  require($file);
+  if ( !is_array($stats_data) )
+  {
+    return false;
+  }
+  
+  unlink($file);
+  
+  echo "\rprocessing $file\n";
+  
+  $newdata = array();
+  foreach ( $stats_data['messages'] as $channel => &$chandata )
+  {
+    echo "  processing channel $channel\n";
+    foreach ( $chandata as $i => $message )
+    {
+      $message_day = gmdate('Ymd', $message['time']);
+      if ( !isset($newdata[$message_day]) )
+      {
+        echo "\r    processing " . gmdate('Y-m-d', $message['time']);
+        $newdata[$message_day] = array(
+          'messages' => array()
+        );
+        if ( isset($stats_data['counts']) )
+        {
+          $newdata[$message_day]['counts'] = $stats_data['counts'];
+        }
+        if ( isset($stats_data['anonymous']) )
+        {
+          $newdata[$message_day]['anonymous'] = $stats_data['anonymous'];
+        }
+      }
+      if ( !isset($newdata[$message_day]['messages'][$channel]) )
+      {
+        $newdata[$message_day]['messages'][$channel] = array();
+      }
+      $newdata[$message_day]['messages'][$channel][] = $message;
+      unset($chandata[$i]);
+    }
+    echo "\n";
+  }
+  foreach ( $newdata as $date => &$data )
+  {
+    echo "\r  writing output for $date";
+    write_stats_file("stats-data-$date.php", $data);
+  }
+  echo "\n";
+}
+
+function write_stats_file($file, $data)
+{
+  $fp = @fopen($file, 'w');
+  if ( !$fp )
+    return false;
+  
+  ob_start();
+  var_export($data);
+  $data = ob_get_contents();
+  ob_end_clean();
+  
+  fwrite($fp, "<?php\n\$stats_data = $data;\n");
+  fclose($fp);
+  unset($data);
+}