questlab/www/analytics/plugins/PrivacyManager/PrivacyManager.php
2014-05-15 12:35:28 +02:00

469 lines
17 KiB
PHP

<?php
/**
* Piwik - Open source web analytics
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\Plugins\PrivacyManager;
use Exception;
use Piwik\Common;
use Piwik\Config as PiwikConfig;
use Piwik\DataTable\DataTableInterface;
use Piwik\Date;
use Piwik\Db;
use Piwik\Menu\MenuAdmin;
use Piwik\Metrics;
use Piwik\Option;
use Piwik\Period\Range;
use Piwik\Period;
use Piwik\Piwik;
use Piwik\Plugins\Goals\Archiver;
use Piwik\ScheduledTask;
use Piwik\ScheduledTime;
use Piwik\Site;
use Piwik\Tracker\GoalManager;
require_once PIWIK_INCLUDE_PATH . '/plugins/PrivacyManager/LogDataPurger.php';
require_once PIWIK_INCLUDE_PATH . '/plugins/PrivacyManager/ReportsPurger.php';
/**
* Specifically include this for Tracker API (which does not use autoloader)
*/
require_once PIWIK_INCLUDE_PATH . '/plugins/PrivacyManager/DoNotTrackHeaderChecker.php';
require_once PIWIK_INCLUDE_PATH . '/plugins/PrivacyManager/IPAnonymizer.php';
/**
*/
class PrivacyManager extends \Piwik\Plugin
{
const OPTION_LAST_DELETE_PIWIK_LOGS = "lastDelete_piwik_logs";
const OPTION_LAST_DELETE_PIWIK_REPORTS = 'lastDelete_piwik_reports';
const OPTION_LAST_DELETE_PIWIK_LOGS_INITIAL = "lastDelete_piwik_logs_initial";
// options for data purging feature array[configName => configSection]
public static $purgeDataOptions = array(
'delete_logs_enable' => 'Deletelogs',
'delete_logs_schedule_lowest_interval' => 'Deletelogs',
'delete_logs_older_than' => 'Deletelogs',
'delete_logs_max_rows_per_query' => 'Deletelogs',
'enable_auto_database_size_estimate' => 'Deletelogs',
'delete_reports_enable' => 'Deletereports',
'delete_reports_older_than' => 'Deletereports',
'delete_reports_keep_basic_metrics' => 'Deletereports',
'delete_reports_keep_day_reports' => 'Deletereports',
'delete_reports_keep_week_reports' => 'Deletereports',
'delete_reports_keep_month_reports' => 'Deletereports',
'delete_reports_keep_year_reports' => 'Deletereports',
'delete_reports_keep_range_reports' => 'Deletereports',
'delete_reports_keep_segment_reports' => 'Deletereports',
);
private $dntChecker = null;
private $ipAnonymizer = null;
/**
* Constructor.
*/
public function __construct()
{
parent::__construct();
$this->dntChecker = new DoNotTrackHeaderChecker();
$this->ipAnonymizer = new IPAnonymizer();
}
/**
* Returns true if it is likely that the data for this report has been purged and if the
* user should be told about that.
*
* In order for this function to return true, the following must also be true:
* - The data table for this report must either be empty or not have been fetched.
* - The period of this report is not a multiple period.
* - The date of this report must be older than the delete_reports_older_than config option.
* @param DataTableInterface $dataTable
* @return bool
*/
public static function hasReportBeenPurged($dataTable)
{
$strPeriod = Common::getRequestVar('period', false);
$strDate = Common::getRequestVar('date', false);
if (false !== $strPeriod
&& false !== $strDate
&& (is_null($dataTable)
|| (!empty($dataTable) && $dataTable->getRowsCount() == 0))
) {
// if range, only look at the first date
if ($strPeriod == 'range') {
$idSite = Common::getRequestVar('idSite', '');
if (intval($idSite) != 0) {
$site = new Site($idSite);
$timezone = $site->getTimezone();
} else {
$timezone = 'UTC';
}
$period = new Range('range', $strDate, $timezone);
$reportDate = $period->getDateStart();
} elseif (Period::isMultiplePeriod($strDate, $strPeriod)) {
// if a multiple period, this function is irrelevant
return false;
} else {
// otherwise, use the date as given
$reportDate = Date::factory($strDate);
}
$reportYear = $reportDate->toString('Y');
$reportMonth = $reportDate->toString('m');
if (static::shouldReportBePurged($reportYear, $reportMonth)) {
return true;
}
}
return false;
}
/**
* @see Piwik\Plugin::getListHooksRegistered
*/
public function getListHooksRegistered()
{
return array(
'AssetManager.getJavaScriptFiles' => 'getJsFiles',
'Menu.Admin.addItems' => 'addMenu',
'TaskScheduler.getScheduledTasks' => 'getScheduledTasks',
'Tracker.setTrackerCacheGeneral' => 'setTrackerCacheGeneral',
'Tracker.isExcludedVisit' => array($this->dntChecker, 'checkHeaderInTracker'),
'Tracker.setVisitorIp' => array($this->ipAnonymizer, 'setVisitorIpAddress'),
);
}
public function setTrackerCacheGeneral(&$cacheContent)
{
$config = new Config();
$cacheContent = $config->setTrackerCacheGeneral($cacheContent);
}
public function getScheduledTasks(&$tasks)
{
// both tasks are low priority so they will execute after most others, but not lowest, so
// they will execute before the optimize tables task
$purgeReportDataTask = new ScheduledTask(
$this, 'deleteReportData', null, ScheduledTime::factory('daily'), ScheduledTask::LOW_PRIORITY
);
$tasks[] = $purgeReportDataTask;
$purgeLogDataTask = new ScheduledTask(
$this, 'deleteLogData', null, ScheduledTime::factory('daily'), ScheduledTask::LOW_PRIORITY
);
$tasks[] = $purgeLogDataTask;
}
public function getJsFiles(&$jsFiles)
{
$jsFiles[] = "plugins/PrivacyManager/javascripts/privacySettings.js";
}
function addMenu()
{
MenuAdmin::addEntry('PrivacyManager_MenuPrivacySettings',
array('module' => 'PrivacyManager', 'action' => 'privacySettings'),
Piwik::isUserHasSomeAdminAccess(),
$order = 7);
}
/**
* Returns the settings for the data purging feature.
*
* @return array
*/
public static function getPurgeDataSettings()
{
$settings = array();
// load settings from ini config
$config = PiwikConfig::getInstance();
foreach (self::$purgeDataOptions as $configKey => $configSection) {
$values = $config->$configSection;
$settings[$configKey] = $values[$configKey];
}
if (!Controller::isDataPurgeSettingsEnabled()) {
return $settings;
}
// load the settings for the data purging settings
foreach (self::$purgeDataOptions as $configName => $configSection) {
$value = Option::get($configName);
if ($value !== false) {
$settings[$configName] = $value;
}
}
return $settings;
}
/**
* Saves the supplied data purging settings.
*
* @param array $settings The settings to save.
*/
public static function savePurgeDataSettings($settings)
{
foreach (self::$purgeDataOptions as $configName => $configSection) {
if (isset($settings[$configName])) {
Option::set($configName, $settings[$configName]);
}
}
}
/**
* Deletes old archived data (reports & metrics).
*
* Archive tables are not optimized after, as that is handled by a separate scheduled task
* in CoreAdminHome. This is a scheduled task and will only execute every N days. The number
* of days is determined by the delete_logs_schedule_lowest_interval config option.
*
* If delete_reports_enable is set to 1, old archive data is deleted. The following
* config options can tweak this behavior:
* - delete_reports_older_than: The number of months after which archive data is considered
* old. The current month is not considered when applying this
* value.
* - delete_reports_keep_basic_metrics: If set to 1, keeps certain metric data. Right now,
* all metric data is kept.
* - delete_reports_keep_day_reports: If set to 1, keeps old daily reports.
* - delete_reports_keep_week_reports: If set to 1, keeps old weekly reports.
* - delete_reports_keep_month_reports: If set to 1, keeps old monthly reports.
* - delete_reports_keep_year_reports: If set to 1, keeps old yearly reports.
*/
public function deleteReportData()
{
$settings = self::getPurgeDataSettings();
// Make sure, data deletion is enabled
if ($settings['delete_reports_enable'] == 0) {
return false;
}
// make sure purging should run at this time (unless this is a forced purge)
if (!$this->shouldPurgeData($settings, self::OPTION_LAST_DELETE_PIWIK_REPORTS)) {
return false;
}
// set last run time
Option::set(self::OPTION_LAST_DELETE_PIWIK_REPORTS, Date::factory('today')->getTimestamp());
ReportsPurger::make($settings, self::getAllMetricsToKeep())->purgeData();
return true;
}
/**
* Deletes old log data based on the options set in the Deletelogs config
* section. This is a scheduled task and will only execute every N days. The number
* of days is determined by the delete_logs_schedule_lowest_interval config option.
*
* If delete_logs_enable is set to 1, old data in the log_visit, log_conversion,
* log_conversion_item and log_link_visit_action tables is deleted. The following
* options can tweak this behavior:
* - delete_logs_older_than: The number of days after which log data is considered old.
*
* @ToDo: return number of Rows deleted in last run; Display age of "oldest" row to help the user setting
* the day offset;
*/
public function deleteLogData()
{
$settings = self::getPurgeDataSettings();
// Make sure, data deletion is enabled
if ($settings['delete_logs_enable'] == 0) {
return false;
}
// make sure purging should run at this time
if (!$this->shouldPurgeData($settings, self::OPTION_LAST_DELETE_PIWIK_LOGS)) {
return false;
}
/*
* Tell the DB that log deletion has run BEFORE deletion is executed;
* If deletion / table optimization exceeds execution time, other tasks maybe prevented of being executed
* every time, when the schedule is triggered.
*/
$lastDeleteDate = Date::factory("today")->getTimestamp();
Option::set(self::OPTION_LAST_DELETE_PIWIK_LOGS, $lastDeleteDate);
// execute the purge
LogDataPurger::make($settings)->purgeData();
return true;
}
/**
* Returns an array describing what data would be purged if both log data & report
* purging is invoked.
*
* The returned array maps table names with the number of rows that will be deleted.
* If the table name is mapped with -1, the table will be dropped.
*
* @param array $settings The config options to use in the estimate. If null, the real
* options are used.
* @return array
*/
public static function getPurgeEstimate($settings = null)
{
if (is_null($settings)) {
$settings = self::getPurgeDataSettings();
}
$result = array();
if ($settings['delete_logs_enable']) {
$logDataPurger = LogDataPurger::make($settings);
$result = array_merge($result, $logDataPurger->getPurgeEstimate());
}
if ($settings['delete_reports_enable']) {
$reportsPurger = ReportsPurger::make($settings, self::getAllMetricsToKeep());
$result = array_merge($result, $reportsPurger->getPurgeEstimate());
}
return $result;
}
/**
* Returns true if a report with the given year & month should be purged or not.
*
* If reportsOlderThan is set to null or not supplied, this function will check if
* a report should be purged, based on existing configuration. In this case, if
* delete_reports_enable is set to 0, this function will return false.
*
* @param int $reportDateYear The year of the report in question.
* @param int $reportDateMonth The month of the report in question.
* @param int|Date $reportsOlderThan If an int, the number of months a report must be older than
* in order to be purged. If a date, the date a report must be
* older than in order to be purged.
* @return bool
*/
public static function shouldReportBePurged($reportDateYear, $reportDateMonth, $reportsOlderThan = null)
{
// if no 'older than' value/date was supplied, use existing config
if (is_null($reportsOlderThan)) {
// if report deletion is not enabled, the report shouldn't be purged
$settings = self::getPurgeDataSettings();
if ($settings['delete_reports_enable'] == 0) {
return false;
}
$reportsOlderThan = $settings['delete_reports_older_than'];
}
// if a integer was supplied, assume it is the number of months a report must be older than
if (!($reportsOlderThan instanceof Date)) {
$reportsOlderThan = Date::factory('today')->subMonth(1 + $reportsOlderThan);
}
return ReportsPurger::shouldReportBePurged(
$reportDateYear, $reportDateMonth, $reportsOlderThan);
}
/**
* Returns the general metrics to keep when the 'delete_reports_keep_basic_metrics'
* config is set to 1.
*/
private static function getMetricsToKeep()
{
return array('nb_uniq_visitors', 'nb_visits', 'nb_actions', 'max_actions',
'sum_visit_length', 'bounce_count', 'nb_visits_converted', 'nb_conversions',
'revenue', 'quantity', 'price', 'orders');
}
/**
* Returns the goal metrics to keep when the 'delete_reports_keep_basic_metrics'
* config is set to 1.
*/
private static function getGoalMetricsToKeep()
{
// keep all goal metrics
return array_values(Metrics::$mappingFromIdToNameGoal);
}
/**
* Returns the names of metrics that should be kept when purging as they appear in
* archive tables.
*/
public static function getAllMetricsToKeep()
{
$metricsToKeep = self::getMetricsToKeep();
// convert goal metric names to correct archive names
if (Common::isGoalPluginEnabled()) {
$goalMetricsToKeep = self::getGoalMetricsToKeep();
$maxGoalId = self::getMaxGoalId();
// for each goal metric, there's a different name for each goal, including the overview,
// the order report & cart report
foreach ($goalMetricsToKeep as $metric) {
for ($i = 1; $i <= $maxGoalId; ++$i) // maxGoalId can be 0
{
$metricsToKeep[] = Archiver::getRecordName($metric, $i);
}
$metricsToKeep[] = Archiver::getRecordName($metric);
$metricsToKeep[] = Archiver::getRecordName($metric, GoalManager::IDGOAL_ORDER);
$metricsToKeep[] = Archiver::getRecordName($metric, GoalManager::IDGOAL_CART);
}
}
return $metricsToKeep;
}
/**
* Returns true if one of the purge data tasks should run now, false if it shouldn't.
*/
private function shouldPurgeData($settings, $lastRanOption)
{
// Log deletion may not run until it is once rescheduled (initial run). This is the
// only way to guarantee the calculated next scheduled deletion time.
$initialDelete = Option::get(self::OPTION_LAST_DELETE_PIWIK_LOGS_INITIAL);
if (empty($initialDelete)) {
Option::set(self::OPTION_LAST_DELETE_PIWIK_LOGS_INITIAL, 1);
return false;
}
// Make sure, log purging is allowed to run now
$lastDelete = Option::get($lastRanOption);
$deleteIntervalDays = $settings['delete_logs_schedule_lowest_interval'];
$deleteIntervalSeconds = $this->getDeleteIntervalInSeconds($deleteIntervalDays);
if ($lastDelete === false ||
($lastDelete !== false && ((int)$lastDelete + $deleteIntervalSeconds) <= time())
) {
return true;
} else // not time to run data purge
{
return false;
}
}
function getDeleteIntervalInSeconds($deleteInterval)
{
return (int)$deleteInterval * 24 * 60 * 60;
}
private static function getMaxGoalId()
{
return Db::fetchOne("SELECT MAX(idgoal) FROM " . Common::prefixTable('goal'));
}
}