334 lines
11 KiB
PHP
Executable file
334 lines
11 KiB
PHP
Executable file
<?php
|
|
/**
|
|
* Piwik - Open source web analytics
|
|
*
|
|
* @link http://piwik.org
|
|
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
|
*
|
|
*/
|
|
namespace Piwik\Plugins\PrivacyManager;
|
|
|
|
use Piwik\Common;
|
|
use Piwik\Date;
|
|
use Piwik\Db;
|
|
use Piwik\Log;
|
|
use Piwik\Piwik;
|
|
|
|
/**
|
|
* Purges the log_visit, log_conversion and related tables of old visit data.
|
|
*/
|
|
class LogDataPurger
|
|
{
|
|
const TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
|
|
|
|
/**
|
|
* The max set of rows each table scan select should query at one time.
|
|
*/
|
|
public static $selectSegmentSize = 100000;
|
|
|
|
/**
|
|
* The number of days after which log entries are considered old.
|
|
*/
|
|
private $deleteLogsOlderThan;
|
|
|
|
/**
|
|
* The number of rows to delete per DELETE query.
|
|
*/
|
|
private $maxRowsToDeletePerQuery;
|
|
|
|
/**
|
|
* Constructor.
|
|
*
|
|
* @param int $deleteLogsOlderThan The number of days after which log entires are considered old.
|
|
* Visits and related data whose age is greater than this number
|
|
* will be purged.
|
|
* @param int $maxRowsToDeletePerQuery The maximum number of rows to delete in one query. Used to
|
|
* make sure log tables aren't locked for too long.
|
|
*/
|
|
public function __construct($deleteLogsOlderThan, $maxRowsToDeletePerQuery)
|
|
{
|
|
$this->deleteLogsOlderThan = $deleteLogsOlderThan;
|
|
$this->maxRowsToDeletePerQuery = $maxRowsToDeletePerQuery;
|
|
}
|
|
|
|
/**
|
|
* Purges old data from the following tables:
|
|
* - log_visit
|
|
* - log_link_visit_action
|
|
* - log_conversion
|
|
* - log_conversion_item
|
|
* - log_action
|
|
*/
|
|
public function purgeData()
|
|
{
|
|
$maxIdVisit = $this->getDeleteIdVisitOffset();
|
|
|
|
// break if no ID was found (nothing to delete for given period)
|
|
if (empty($maxIdVisit)) {
|
|
return;
|
|
}
|
|
|
|
$logTables = self::getDeleteTableLogTables();
|
|
|
|
// delete data from log tables
|
|
$where = "WHERE idvisit <= ?";
|
|
foreach ($logTables as $logTable) {
|
|
// deleting from log_action must be handled differently, so we do it later
|
|
if ($logTable != Common::prefixTable('log_action')) {
|
|
Db::deleteAllRows($logTable, $where, "idvisit ASC", $this->maxRowsToDeletePerQuery, array($maxIdVisit));
|
|
}
|
|
}
|
|
|
|
// delete unused actions from the log_action table (but only if we can lock tables)
|
|
if (Db::isLockPrivilegeGranted()) {
|
|
$this->purgeUnusedLogActions();
|
|
} else {
|
|
$logMessage = get_class($this) . ": LOCK TABLES privilege not granted; skipping unused actions purge";
|
|
Log::warning($logMessage);
|
|
}
|
|
|
|
// optimize table overhead after deletion
|
|
Db::optimizeTables($logTables);
|
|
}
|
|
|
|
/**
|
|
* Returns an array describing what data would be purged if purging were invoked.
|
|
*
|
|
* This function returns an array that maps table names with the number of rows
|
|
* that will be deleted.
|
|
*
|
|
* @return array
|
|
*/
|
|
public function getPurgeEstimate()
|
|
{
|
|
$result = array();
|
|
|
|
// deal w/ log tables that will be purged
|
|
$maxIdVisit = $this->getDeleteIdVisitOffset();
|
|
if (!empty($maxIdVisit)) {
|
|
foreach ($this->getDeleteTableLogTables() as $table) {
|
|
// getting an estimate for log_action is not supported since it can take too long
|
|
if ($table != Common::prefixTable('log_action')) {
|
|
$rowCount = $this->getLogTableDeleteCount($table, $maxIdVisit);
|
|
if ($rowCount > 0) {
|
|
$result[$table] = $rowCount;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Safely delete all unused log_action rows.
|
|
*/
|
|
private function purgeUnusedLogActions()
|
|
{
|
|
$this->createTempTable();
|
|
|
|
// get current max ID in log tables w/ idaction references.
|
|
$maxIds = $this->getMaxIdsInLogTables();
|
|
|
|
// do large insert (inserting everything before maxIds) w/o locking tables...
|
|
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
|
|
|
|
// ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
|
|
$this->lockLogTables();
|
|
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
|
|
|
|
// delete before unlocking tables so there's no chance a new log row that references an
|
|
// unused action will be inserted.
|
|
$this->deleteUnusedActions();
|
|
Db::unlockAllTables();
|
|
}
|
|
|
|
/**
|
|
* get highest idVisit to delete rows from
|
|
* @return string
|
|
*/
|
|
private function getDeleteIdVisitOffset()
|
|
{
|
|
$logVisit = Common::prefixTable("log_visit");
|
|
|
|
// get max idvisit
|
|
$maxIdVisit = Db::fetchOne("SELECT MAX(idvisit) FROM $logVisit");
|
|
if (empty($maxIdVisit)) {
|
|
return false;
|
|
}
|
|
|
|
// select highest idvisit to delete from
|
|
$dateStart = Date::factory("today")->subDay($this->deleteLogsOlderThan);
|
|
$sql = "SELECT idvisit
|
|
FROM $logVisit
|
|
WHERE '" . $dateStart->toString('Y-m-d H:i:s') . "' > visit_last_action_time
|
|
AND idvisit <= ?
|
|
AND idvisit > ?
|
|
ORDER BY idvisit DESC
|
|
LIMIT 1";
|
|
|
|
return Db::segmentedFetchFirst($sql, $maxIdVisit, 0, -self::$selectSegmentSize);
|
|
}
|
|
|
|
private function getLogTableDeleteCount($table, $maxIdVisit)
|
|
{
|
|
$sql = "SELECT COUNT(*) FROM $table WHERE idvisit <= ?";
|
|
return (int)Db::fetchOne($sql, array($maxIdVisit));
|
|
}
|
|
|
|
private function createTempTable()
|
|
{
|
|
$sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::TEMP_TABLE_NAME) . " (
|
|
idaction INT(11),
|
|
PRIMARY KEY (idaction)
|
|
)";
|
|
Db::query($sql);
|
|
}
|
|
|
|
private function getMaxIdsInLogTables()
|
|
{
|
|
$tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item');
|
|
$idColumns = $this->getTableIdColumns();
|
|
|
|
$result = array();
|
|
foreach ($tables as $table) {
|
|
$idCol = $idColumns[$table];
|
|
$result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
private function insertActionsToKeep($maxIds, $olderThan = true)
|
|
{
|
|
$tempTableName = Common::prefixTable(self::TEMP_TABLE_NAME);
|
|
|
|
$idColumns = $this->getTableIdColumns();
|
|
foreach ($this->getIdActionColumns() as $table => $columns) {
|
|
$idCol = $idColumns[$table];
|
|
|
|
foreach ($columns as $col) {
|
|
$select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
|
|
$sql = "INSERT IGNORE INTO $tempTableName $select";
|
|
|
|
if ($olderThan) {
|
|
$start = 0;
|
|
$finish = $maxIds[$table];
|
|
} else {
|
|
$start = $maxIds[$table];
|
|
$finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
|
|
}
|
|
|
|
Db::segmentedQuery($sql, $start, $finish, self::$selectSegmentSize);
|
|
}
|
|
}
|
|
|
|
// allow code to be executed after data is inserted. for concurrency testing purposes.
|
|
if ($olderThan) {
|
|
/**
|
|
* @ignore
|
|
*/
|
|
Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.olderThan");
|
|
} else {
|
|
/**
|
|
* @ignore
|
|
*/
|
|
Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.newerThan");
|
|
}
|
|
}
|
|
|
|
private function lockLogTables()
|
|
{
|
|
Db::lockTables(
|
|
$readLocks = Common::prefixTables('log_conversion',
|
|
'log_link_visit_action',
|
|
'log_visit',
|
|
'log_conversion_item'),
|
|
$writeLocks = Common::prefixTables('log_action')
|
|
);
|
|
}
|
|
|
|
private function deleteUnusedActions()
|
|
{
|
|
list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::TEMP_TABLE_NAME);
|
|
|
|
$deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
|
|
FROM $logActionTable
|
|
LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
|
|
WHERE tmp.idaction IS NULL";
|
|
|
|
Db::query($deleteSql);
|
|
}
|
|
|
|
private function getIdActionColumns()
|
|
{
|
|
return array(
|
|
'log_link_visit_action' => array('idaction_url',
|
|
'idaction_url_ref',
|
|
'idaction_name',
|
|
'idaction_name_ref',
|
|
'idaction_event_category',
|
|
'idaction_event_action'
|
|
),
|
|
|
|
'log_conversion' => array('idaction_url'),
|
|
|
|
'log_visit' => array('visit_exit_idaction_url',
|
|
'visit_exit_idaction_name',
|
|
'visit_entry_idaction_url',
|
|
'visit_entry_idaction_name'),
|
|
|
|
'log_conversion_item' => array('idaction_sku',
|
|
'idaction_name',
|
|
'idaction_category',
|
|
'idaction_category2',
|
|
'idaction_category3',
|
|
'idaction_category4',
|
|
'idaction_category5')
|
|
);
|
|
}
|
|
|
|
private function getTableIdColumns()
|
|
{
|
|
return array(
|
|
'log_link_visit_action' => 'idlink_va',
|
|
'log_conversion' => 'idvisit',
|
|
'log_visit' => 'idvisit',
|
|
'log_conversion_item' => 'idvisit'
|
|
);
|
|
}
|
|
|
|
// let's hardcode, since these are not dynamically created tables
|
|
public static function getDeleteTableLogTables()
|
|
{
|
|
$result = Common::prefixTables('log_conversion',
|
|
'log_link_visit_action',
|
|
'log_visit',
|
|
'log_conversion_item');
|
|
if (Db::isLockPrivilegeGranted()) {
|
|
$result[] = Common::prefixTable('log_action');
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Utility function. Creates a new instance of LogDataPurger with the supplied array
|
|
* of settings.
|
|
*
|
|
* $settings must contain values for the following keys:
|
|
* - 'delete_logs_older_than': The number of days after which log entries are considered
|
|
* old.
|
|
* - 'delete_logs_max_rows_per_query': Max number of rows to DELETE in one query.
|
|
*
|
|
* @param array $settings Array of settings
|
|
* @param bool $useRealTable
|
|
* @return \Piwik\Plugins\PrivacyManager\LogDataPurger
|
|
*/
|
|
public static function make($settings, $useRealTable = false)
|
|
{
|
|
return new LogDataPurger(
|
|
$settings['delete_logs_older_than'],
|
|
$settings['delete_logs_max_rows_per_query']
|
|
);
|
|
}
|
|
}
|