update Piwik to version 2.16 (fixes #91)

This commit is contained in:
oliver 2016-04-10 18:55:57 +02:00
commit d885a4baa9
5833 changed files with 418860 additions and 226988 deletions

View file

@ -0,0 +1,34 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\DataAccess;
use Piwik\Db;
use Piwik\Common;
/**
* Data Access Object for operations dealing with the log_action table.
*/
class Actions
{
/**
* Removes a list of actions from the log_action table by ID.
*
* @param int[] $idActions
*/
public function delete($idActions)
{
foreach ($idActions as &$id) {
$id = (int)$id;
}
$table = Common::prefixTable('log_action');
$sql = "DELETE FROM $table WHERE idaction IN (" . implode(",", $idActions) . ")";
Db::query($sql);
}
}

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -9,18 +9,16 @@
namespace Piwik\DataAccess;
use Exception;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Archive;
use Piwik\Archive\Chunk;
use Piwik\ArchiveProcessor;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Common;
use Piwik\Date;
use Piwik\Db;
use Piwik\Log;
use Piwik\Period;
use Piwik\Period\Range;
use Piwik\Piwik;
use Piwik\Segment;
use Piwik\Site;
/**
* Data Access object used to query archives
@ -42,52 +40,46 @@ class ArchiveSelector
const NB_VISITS_CONVERTED_RECORD_LOOKED_UP = "nb_visits_converted";
static public function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params, $minDatetimeArchiveProcessedUTC)
private static function getModel()
{
$dateStart = $params->getPeriod()->getDateStart();
$bindSQL = array($params->getSite()->getId(),
$dateStart->toString('Y-m-d'),
$params->getPeriod()->getDateEnd()->toString('Y-m-d'),
$params->getPeriod()->getId(),
);
return new Model();
}
$timeStampWhere = '';
public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params, $minDatetimeArchiveProcessedUTC)
{
$idSite = $params->getSite()->getId();
$period = $params->getPeriod()->getId();
$dateStart = $params->getPeriod()->getDateStart();
$dateStartIso = $dateStart->toString('Y-m-d');
$dateEndIso = $params->getPeriod()->getDateEnd()->toString('Y-m-d');
$numericTable = ArchiveTableCreator::getNumericTable($dateStart);
$minDatetimeIsoArchiveProcessedUTC = null;
if ($minDatetimeArchiveProcessedUTC) {
$timeStampWhere = " AND ts_archived >= ? ";
$bindSQL[] = Date::factory($minDatetimeArchiveProcessedUTC)->getDatetime();
$minDatetimeIsoArchiveProcessedUTC = Date::factory($minDatetimeArchiveProcessedUTC)->getDatetime();
}
$requestedPlugin = $params->getRequestedPlugin();
$segment = $params->getSegment();
$isSkipAggregationOfSubTables = $params->isSkipAggregationOfSubTables();
$segment = $params->getSegment();
$plugins = array("VisitsSummary", $requestedPlugin);
$sqlWhereArchiveName = self::getNameCondition($plugins, $segment, $isSkipAggregationOfSubTables);
$sqlQuery = " SELECT idarchive, value, name, date1 as startDate
FROM " . ArchiveTableCreator::getNumericTable($dateStart) . "``
WHERE idsite = ?
AND date1 = ?
AND date2 = ?
AND period = ?
AND ( ($sqlWhereArchiveName)
OR name = '" . self::NB_VISITS_RECORD_LOOKED_UP . "'
OR name = '" . self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "')
$timeStampWhere
ORDER BY idarchive DESC";
$results = Db::fetchAll($sqlQuery, $bindSQL);
$doneFlags = Rules::getDoneFlags($plugins, $segment);
$doneFlagValues = Rules::getSelectableDoneFlagValues();
$results = self::getModel()->getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues);
if (empty($results)) {
return false;
}
$idArchive = self::getMostRecentIdArchiveFromResults($segment, $requestedPlugin, $isSkipAggregationOfSubTables, $results);
$idArchiveVisitsSummary = self::getMostRecentIdArchiveFromResults($segment, "VisitsSummary", $isSkipAggregationOfSubTables, $results);
$idArchive = self::getMostRecentIdArchiveFromResults($segment, $requestedPlugin, $results);
$idArchiveVisitsSummary = self::getMostRecentIdArchiveFromResults($segment, "VisitsSummary", $results);
list($visits, $visitsConverted) = self::getVisitsMetricsFromResults($idArchive, $idArchiveVisitsSummary, $results);
if ($visits === false
&& $idArchive === false
) {
if (false === $visits && false === $idArchive) {
return false;
}
@ -98,9 +90,11 @@ class ArchiveSelector
{
$visits = $visitsConverted = false;
$archiveWithVisitsMetricsWasFound = ($idArchiveVisitsSummary !== false);
if ($archiveWithVisitsMetricsWasFound) {
$visits = $visitsConverted = 0;
}
foreach ($results as $result) {
if (in_array($result['idarchive'], array($idArchive, $idArchiveVisitsSummary))) {
$value = (int)$result['value'];
@ -116,13 +110,15 @@ class ArchiveSelector
}
}
}
return array($visits, $visitsConverted);
}
protected static function getMostRecentIdArchiveFromResults(Segment $segment, $requestedPlugin, $isSkipAggregationOfSubTables, $results)
protected static function getMostRecentIdArchiveFromResults(Segment $segment, $requestedPlugin, $results)
{
$idArchive = false;
$namesRequestedPlugin = Rules::getDoneFlags(array($requestedPlugin), $segment, $isSkipAggregationOfSubTables);
$namesRequestedPlugin = Rules::getDoneFlags(array($requestedPlugin), $segment);
foreach ($results as $result) {
if ($idArchive === false
&& in_array($result['name'], $namesRequestedPlugin)
@ -131,6 +127,7 @@ class ArchiveSelector
break;
}
}
return $idArchive;
}
@ -141,21 +138,29 @@ class ArchiveSelector
* @param array $periods
* @param Segment $segment
* @param array $plugins List of plugin names for which data is being requested.
* @param bool $isSkipAggregationOfSubTables Whether we are selecting an archive that may be partial (no sub-tables)
* @return array Archive IDs are grouped by archive name and period range, ie,
* array(
* 'VisitsSummary.done' => array(
* '2010-01-01' => array(1,2,3)
* )
* )
* @throws
*/
static public function getArchiveIds($siteIds, $periods, $segment, $plugins, $isSkipAggregationOfSubTables = false)
public static function getArchiveIds($siteIds, $periods, $segment, $plugins)
{
if (empty($siteIds)) {
throw new \Exception("Website IDs could not be read from the request, ie. idSite=");
}
foreach ($siteIds as $index => $siteId) {
$siteIds[$index] = (int) $siteId;
}
$getArchiveIdsSql = "SELECT idsite, name, date1, date2, MAX(idarchive) as idarchive
FROM %s
WHERE %s
AND " . self::getNameCondition($plugins, $segment, $isSkipAggregationOfSubTables) . "
AND idsite IN (" . implode(',', $siteIds) . ")
WHERE idsite IN (" . implode(',', $siteIds) . ")
AND " . self::getNameCondition($plugins, $segment) . "
AND %s
GROUP BY idsite, date1, date2";
$monthToPeriods = array();
@ -197,14 +202,14 @@ class ArchiveSelector
$sql = sprintf($getArchiveIdsSql, $table, $dateCondition);
$archiveIds = Db::fetchAll($sql, $bind);
// get the archive IDs
foreach (Db::fetchAll($sql, $bind) as $row) {
$archiveName = $row['name'];
foreach ($archiveIds as $row) {
//FIXMEA duplicate with Archive.php
$dateStr = $row['date1'] . "," . $row['date2'];
$dateStr = $row['date1'] . ',' . $row['date2'];
$result[$archiveName][$dateStr][] = $row['idarchive'];
$result[$row['name']][$dateStr][] = $row['idarchive'];
}
}
@ -215,29 +220,52 @@ class ArchiveSelector
* Queries and returns archive data using a set of archive IDs.
*
* @param array $archiveIds The IDs of the archives to get data from.
* @param array $recordNames The names of the data to retrieve (ie, nb_visits, nb_actions, etc.)
* @param array $recordNames The names of the data to retrieve (ie, nb_visits, nb_actions, etc.).
* Note: You CANNOT pass multiple recordnames if $loadAllSubtables=true.
* @param string $archiveDataType The archive data type (either, 'blob' or 'numeric').
* @param bool $loadAllSubtables Whether to pre-load all subtables
* @param int|null|string $idSubtable null if the root blob should be loaded, an integer if a subtable should be
* loaded and 'all' if all subtables should be loaded.
* @throws Exception
* @return array
*/
static public function getArchiveData($archiveIds, $recordNames, $archiveDataType, $loadAllSubtables)
public static function getArchiveData($archiveIds, $recordNames, $archiveDataType, $idSubtable)
{
$chunk = new Chunk();
// create the SQL to select archive data
$inNames = Common::getSqlStringFieldsArray($recordNames);
$loadAllSubtables = $idSubtable == Archive::ID_SUBTABLE_LOAD_ALL_SUBTABLES;
if ($loadAllSubtables) {
$name = reset($recordNames);
// select blobs w/ name like "$name_[0-9]+" w/o using RLIKE
$nameEnd = strlen($name) + 2;
$whereNameIs = "(name = ?
OR (name LIKE ?
AND SUBSTRING(name, $nameEnd, 1) >= '0'
AND SUBSTRING(name, $nameEnd, 1) <= '9') )";
$nameEnd = strlen($name) + 1;
$nameEndAppendix = $nameEnd + 1;
$appendix = $chunk->getAppendix();
$lenAppendix = strlen($appendix);
$checkForChunkBlob = "SUBSTRING(name, $nameEnd, $lenAppendix) = '$appendix'";
$checkForSubtableId = "(SUBSTRING(name, $nameEndAppendix, 1) >= '0'
AND SUBSTRING(name, $nameEndAppendix, 1) <= '9')";
$whereNameIs = "(name = ? OR (name LIKE ? AND ( $checkForChunkBlob OR $checkForSubtableId ) ))";
$bind = array($name, $name . '%');
} else {
if ($idSubtable === null) {
// select root table or specific record names
$bind = array_values($recordNames);
} else {
// select a subtable id
$bind = array();
foreach ($recordNames as $recordName) {
// to be backwards compatibe we need to look for the exact idSubtable blob and for the chunk
// that stores the subtables (a chunk stores many blobs in one blob)
$bind[] = $chunk->getRecordNameForTableId($recordName, $idSubtable);
$bind[] = self::appendIdSubtable($recordName, $idSubtable);
}
}
$inNames = Common::getSqlStringFieldsArray($bind);
$whereNameIs = "name IN ($inNames)";
$bind = array_values($recordNames);
}
$getValuesSql = "SELECT value, name, idsite, date1, date2, ts_archived
@ -251,110 +279,91 @@ class ArchiveSelector
if (empty($ids)) {
throw new Exception("Unexpected: id archive not found for period '$period' '");
}
// $period = "2009-01-04,2009-01-04",
$date = Date::factory(substr($period, 0, 10));
if ($archiveDataType == 'numeric') {
$isNumeric = $archiveDataType == 'numeric';
if ($isNumeric) {
$table = ArchiveTableCreator::getNumericTable($date);
} else {
$table = ArchiveTableCreator::getBlobTable($date);
}
$sql = sprintf($getValuesSql, $table, implode(',', $ids));
$sql = sprintf($getValuesSql, $table, implode(',', $ids));
$dataRows = Db::fetchAll($sql, $bind);
foreach ($dataRows as $row) {
$rows[] = $row;
if ($isNumeric) {
$rows[] = $row;
} else {
$row['value'] = self::uncompress($row['value']);
if ($chunk->isRecordNameAChunk($row['name'])) {
self::moveChunkRowToRows($rows, $row, $chunk, $loadAllSubtables, $idSubtable);
} else {
$rows[] = $row;
}
}
}
}
return $rows;
}
private static function moveChunkRowToRows(&$rows, $row, Chunk $chunk, $loadAllSubtables, $idSubtable)
{
// $blobs = array([subtableID] = [blob of subtableId])
$blobs = unserialize($row['value']);
if (!is_array($blobs)) {
return;
}
// $rawName = eg 'PluginName_ArchiveName'
$rawName = $chunk->getRecordNameWithoutChunkAppendix($row['name']);
if ($loadAllSubtables) {
foreach ($blobs as $subtableId => $blob) {
$row['value'] = $blob;
$row['name'] = self::appendIdSubtable($rawName, $subtableId);
$rows[] = $row;
}
} elseif (array_key_exists($idSubtable, $blobs)) {
$row['value'] = $blobs[$idSubtable];
$row['name'] = self::appendIdSubtable($rawName, $idSubtable);
$rows[] = $row;
}
}
public static function appendIdSubtable($recordName, $id)
{
return $recordName . "_" . $id;
}
private static function uncompress($data)
{
return @gzuncompress($data);
}
/**
* Returns the SQL condition used to find successfully completed archives that
* this instance is querying for.
*
* @param array $plugins
* @param Segment $segment
* @param bool $isSkipAggregationOfSubTables
* @return string
*/
static private function getNameCondition(array $plugins, Segment $segment, $isSkipAggregationOfSubTables)
private static function getNameCondition(array $plugins, Segment $segment)
{
// the flags used to tell how the archiving process for a specific archive was completed,
// if it was completed
$doneFlags = Rules::getDoneFlags($plugins, $segment, $isSkipAggregationOfSubTables);
$doneFlags = Rules::getDoneFlags($plugins, $segment);
$allDoneFlags = "'" . implode("','", $doneFlags) . "'";
$possibleValues = Rules::getSelectableDoneFlagValues();
// create the SQL to find archives that are DONE
return "((name IN ($allDoneFlags)) AND " .
" (value = '" . ArchiveWriter::DONE_OK . "' OR " .
" value = '" . ArchiveWriter::DONE_OK_TEMPORARY . "'))";
}
static public function purgeOutdatedArchives(Date $dateStart)
{
$purgeArchivesOlderThan = Rules::shouldPurgeOutdatedArchives($dateStart);
if (!$purgeArchivesOlderThan) {
return;
}
$idArchivesToDelete = self::getTemporaryArchiveIdsOlderThan($dateStart, $purgeArchivesOlderThan);
if (!empty($idArchivesToDelete)) {
self::deleteArchiveIds($dateStart, $idArchivesToDelete);
}
self::deleteArchivesWithPeriodRange($dateStart);
Log::debug("Purging temporary archives: done [ purged archives older than %s in %s ] [Deleted IDs: %s]",
$purgeArchivesOlderThan, $dateStart->toString("Y-m"), implode(',', $idArchivesToDelete));
}
/*
* Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space
*/
protected static function deleteArchivesWithPeriodRange(Date $date)
{
$query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?";
$yesterday = Date::factory('yesterday')->getDateTime();
$bind = array(Piwik::$idPeriods['range'], $yesterday);
$numericTable = ArchiveTableCreator::getNumericTable($date);
Db::query(sprintf($query, $numericTable), $bind);
Log::debug("Purging Custom Range archives: done [ purged archives older than %s from %s / blob ]", $yesterday, $numericTable);
try {
Db::query(sprintf($query, ArchiveTableCreator::getBlobTable($date)), $bind);
} catch (Exception $e) {
// Individual blob tables could be missing
}
}
protected static function deleteArchiveIds(Date $date, $idArchivesToDelete)
{
$query = "DELETE FROM %s WHERE idarchive IN (" . implode(',', $idArchivesToDelete) . ")";
Db::query(sprintf($query, ArchiveTableCreator::getNumericTable($date)));
try {
Db::query(sprintf($query, ArchiveTableCreator::getBlobTable($date)));
} catch (Exception $e) {
// Individual blob tables could be missing
}
}
protected static function getTemporaryArchiveIdsOlderThan(Date $date, $purgeArchivesOlderThan)
{
$query = "SELECT idarchive
FROM " . ArchiveTableCreator::getNumericTable($date) . "
WHERE name LIKE 'done%'
AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . "
AND ts_archived < ?)
OR value = " . ArchiveWriter::DONE_ERROR . ")";
$result = Db::fetchAll($query, array($purgeArchivesOlderThan));
$idArchivesToDelete = array();
if (!empty($result)) {
foreach ($result as $row) {
$idArchivesToDelete[] = $row['idarchive'];
}
}
return $idArchivesToDelete;
return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))";
}
}

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -9,70 +9,61 @@
namespace Piwik\DataAccess;
use Exception;
use Piwik\Common;
use Piwik\Date;
use Piwik\Db;
use Piwik\DbHelper;
class ArchiveTableCreator
{
const NUMERIC_TABLE = "numeric";
const BLOB_TABLE = "blob";
const BLOB_TABLE = "blob";
public static $tablesAlreadyInstalled = null;
static public $tablesAlreadyInstalled = null;
static public function getNumericTable(Date $date)
public static function getNumericTable(Date $date)
{
return self::getTable($date, self::NUMERIC_TABLE);
}
static public function getBlobTable(Date $date)
public static function getBlobTable(Date $date)
{
return self::getTable($date, self::BLOB_TABLE);
}
static protected function getTable(Date $date, $type)
protected static function getTable(Date $date, $type)
{
$tableNamePrefix = "archive_" . $type;
$tableName = $tableNamePrefix . "_" . $date->toString('Y_m');
$tableName = $tableNamePrefix . "_" . self::getTableMonthFromDate($date);
$tableName = Common::prefixTable($tableName);
self::createArchiveTablesIfAbsent($tableName, $tableNamePrefix);
return $tableName;
}
static protected function createArchiveTablesIfAbsent($tableName, $tableNamePrefix)
protected static function createArchiveTablesIfAbsent($tableName, $tableNamePrefix)
{
if (is_null(self::$tablesAlreadyInstalled)) {
self::refreshTableList();
}
if (!in_array($tableName, self::$tablesAlreadyInstalled)) {
$db = Db::get();
$sql = DbHelper::getTableCreateSql($tableNamePrefix);
// replace table name template by real name
$tableNamePrefix = Common::prefixTable($tableNamePrefix);
$sql = str_replace($tableNamePrefix, $tableName, $sql);
try {
$db->query($sql);
} catch (Exception $e) {
// accept mysql error 1050: table already exists, throw otherwise
if (!$db->isErrNo($e, '1050')) {
throw $e;
}
}
self::getModel()->createArchiveTable($tableName, $tableNamePrefix);
self::$tablesAlreadyInstalled[] = $tableName;
}
}
static public function clear()
private static function getModel()
{
return new Model();
}
public static function clear()
{
self::$tablesAlreadyInstalled = null;
}
static public function refreshTableList($forceReload = false)
public static function refreshTableList($forceReload = false)
{
self::$tablesAlreadyInstalled = DbHelper::getTablesInstalled($forceReload);
}
@ -80,40 +71,53 @@ class ArchiveTableCreator
/**
* Returns all table names archive_*
*
* @param string $type The type of table to return. Either `self::NUMERIC_TABLE` or `self::BLOB_TABLE`.
* @return array
*/
static public function getTablesArchivesInstalled()
public static function getTablesArchivesInstalled($type = null)
{
if (is_null(self::$tablesAlreadyInstalled)) {
self::refreshTableList();
}
if (empty($type)) {
$tableMatchRegex = '/archive_(numeric|blob)_/';
} else {
$tableMatchRegex = '/archive_' . preg_quote($type) . '_/';
}
$archiveTables = array();
foreach (self::$tablesAlreadyInstalled as $table) {
if (strpos($table, 'archive_numeric_') !== false
|| strpos($table, 'archive_blob_') !== false
) {
if (preg_match($tableMatchRegex, $table)) {
$archiveTables[] = $table;
}
}
return $archiveTables;
}
static public function getDateFromTableName($tableName)
public static function getDateFromTableName($tableName)
{
$tableName = Common::unprefixTable($tableName);
$date = str_replace(array('archive_numeric_', 'archive_blob_'), '', $tableName);
$date = str_replace(array('archive_numeric_', 'archive_blob_'), '', $tableName);
return $date;
}
static public function getTypeFromTableName($tableName)
public static function getTableMonthFromDate(Date $date)
{
return $date->toString('Y_m');
}
public static function getTypeFromTableName($tableName)
{
if (strpos($tableName, 'archive_numeric_') !== false) {
return self::NUMERIC_TABLE;
}
if (strpos($tableName, 'archive_blob_') !== false) {
return self::BLOB_TABLE;
}
return false;
}
}

View file

@ -0,0 +1,89 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\DataAccess;
use Piwik\Common;
use Piwik\Config;
use Piwik\Db;
/**
* Data Access class for querying numeric & blob archive tables.
*/
class ArchiveTableDao
{
/**
* Analyzes numeric & blob tables for a single table date (ie, `'2015_01'`) and returns
* statistics including:
*
* - number of archives present
* - number of invalidated archives
* - number of temporary archives
* - number of error archives
* - number of segment archives
* - number of numeric rows
* - number of blob rows
*
* @param string $tableDate ie `'2015_01'`
* @return array
*/
public function getArchiveTableAnalysis($tableDate)
{
$numericQueryEmptyRow = array(
'count_archives' => '-',
'count_invalidated_archives' => '-',
'count_temporary_archives' => '-',
'count_error_archives' => '-',
'count_segment_archives' => '-',
'count_numeric_rows' => '-',
);
$tableDate = str_replace("`", "", $tableDate); // for sanity
$numericTable = Common::prefixTable("archive_numeric_$tableDate");
$blobTable = Common::prefixTable("archive_blob_$tableDate");
// query numeric table
$sql = "SELECT CONCAT_WS('.', idsite, date1, date2, period) AS label,
SUM(CASE WHEN name LIKE 'done%' THEN 1 ELSE 0 END) AS count_archives,
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_invalidated_archives,
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_temporary_archives,
SUM(CASE WHEN name LIKE 'done%' AND value = ? THEN 1 ELSE 0 END) AS count_error_archives,
SUM(CASE WHEN name LIKE 'done%' AND CHAR_LENGTH(name) > 32 THEN 1 ELSE 0 END) AS count_segment_archives,
SUM(CASE WHEN name NOT LIKE 'done%' THEN 1 ELSE 0 END) AS count_numeric_rows,
0 AS count_blob_rows
FROM `$numericTable`
GROUP BY idsite, date1, date2, period";
$rows = Db::fetchAll($sql, array(ArchiveWriter::DONE_INVALIDATED, ArchiveWriter::DONE_OK_TEMPORARY,
ArchiveWriter::DONE_ERROR));
// index result
$result = array();
foreach ($rows as $row) {
$result[$row['label']] = $row;
}
// query blob table & manually merge results (no FULL OUTER JOIN in mysql)
$sql = "SELECT CONCAT_WS('.', idsite, date1, date2, period) AS label,
COUNT(*) AS count_blob_rows
FROM `$blobTable`
GROUP BY idsite, date1, date1, period";
foreach (Db::fetchAll($sql) as $blobStatsRow) {
$label = $blobStatsRow['label'];
if (isset($result[$label])) {
$result[$label] = array_merge($result[$label], $blobStatsRow);
} else {
$result[$label] = $blobStatsRow + $numericQueryEmptyRow;
}
}
return $result;
}
}

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -9,17 +9,13 @@
namespace Piwik\DataAccess;
use Exception;
use Piwik\Archive;
use Piwik\Archive\Chunk;
use Piwik\ArchiveProcessor\Rules;
use Piwik\ArchiveProcessor;
use Piwik\Common;
use Piwik\Config;
use Piwik\Db;
use Piwik\Db\BatchInsert;
use Piwik\Log;
use Piwik\Period;
use Piwik\Segment;
use Piwik\SettingsPiwik;
/**
* This class is used to create a new Archive.
@ -28,7 +24,6 @@ use Piwik\SettingsPiwik;
*/
class ArchiveWriter
{
const PREFIX_SQL_LOCK = "locked_";
/**
* Flag stored at the end of the archiving
*
@ -50,23 +45,31 @@ class ArchiveWriter
*/
const DONE_OK_TEMPORARY = 3;
/**
* Flag indicated that archive is done but was marked as invalid later and needs to be re-processed during next archiving process
*
* @var int
*/
const DONE_INVALIDATED = 4;
protected $fields = array('idarchive',
'idsite',
'date1',
'date2',
'period',
'ts_archived',
'name',
'value');
'idsite',
'date1',
'date2',
'period',
'ts_archived',
'name',
'value');
public function __construct(ArchiveProcessor\Parameters $params, $isArchiveTemporary)
{
$this->idArchive = false;
$this->idSite = $params->getSite()->getId();
$this->segment = $params->getSegment();
$this->period = $params->getPeriod();
$this->idSite = $params->getSite()->getId();
$this->segment = $params->getSegment();
$this->period = $params->getPeriod();
$idSites = array($this->idSite);
$this->doneFlag = Rules::getDoneStringFlagFor($idSites, $this->segment, $this->period->getLabel(), $params->getRequestedPlugin(), $params->isSkipAggregationOfSubTables());
$this->doneFlag = Rules::getDoneStringFlagFor($idSites, $this->segment, $this->period->getLabel(), $params->getRequestedPlugin());
$this->isArchiveTemporary = $isArchiveTemporary;
$this->dateStart = $this->period->getDateStart();
@ -74,25 +77,32 @@ class ArchiveWriter
/**
* @param string $name
* @param string[] $values
* @param string|string[] $values A blob string or an array of blob strings. If an array
* is used, the first element in the array will be inserted
* with the `$name` name. The others will be splitted into chunks. All subtables
* within one chunk will be serialized as an array where the index is the
* subtableId.
*/
public function insertBlobRecord($name, $values)
{
if (is_array($values)) {
$clean = array();
foreach ($values as $id => $value) {
// for the parent Table we keep the name
// for example for the Table of searchEngines we keep the name 'referrer_search_engine'
// but for the child table of 'Google' which has the ID = 9 the name would be 'referrer_search_engine_9'
$newName = $name;
if ($id != 0) {
//FIXMEA: refactor
$newName = $name . '_' . $id;
}
$value = $this->compress($value);
$clean[] = array($newName, $value);
if (isset($values[0])) {
// we always store the root table in a single blob for fast access
$clean[] = array($name, $this->compress($values[0]));
unset($values[0]);
}
if (!empty($values)) {
// we move all subtables into chunks
$chunk = new Chunk();
$chunks = $chunk->moveArchiveBlobsIntoChunks($name, $values);
foreach ($chunks as $index => $subtables) {
$clean[] = array($index, $this->compress(serialize($subtables)));
}
}
$this->insertBulkRecords($clean);
return;
}
@ -106,6 +116,7 @@ class ArchiveWriter
if ($this->idArchive === false) {
throw new Exception("Must call allocateNewArchiveId() first");
}
return $this->idArchive;
}
@ -117,108 +128,49 @@ class ArchiveWriter
public function finalizeArchive()
{
$this->deletePreviousArchiveStatus();
$numericTable = $this->getTableNumeric();
$idArchive = $this->getIdArchive();
$this->getModel()->deletePreviousArchiveStatus($numericTable, $idArchive, $this->doneFlag);
$this->logArchiveStatusAsFinal();
}
static protected function compress($data)
protected function compress($data)
{
if (Db::get()->hasBlobDataType()) {
return gzcompress($data);
}
return $data;
}
protected function getArchiveLockName()
{
$numericTable = $this->getTableNumeric();
$dbLockName = "allocateNewArchiveId.$numericTable";
return $dbLockName;
}
protected function acquireArchiveTableLock()
{
$dbLockName = $this->getArchiveLockName();
if (Db::getDbLock($dbLockName, $maxRetries = 30) === false) {
throw new Exception("allocateNewArchiveId: Cannot get named lock $dbLockName.");
}
}
protected function releaseArchiveTableLock()
{
$dbLockName = $this->getArchiveLockName();
Db::releaseDbLock($dbLockName);
}
protected function allocateNewArchiveId()
{
$this->idArchive = $this->insertNewArchiveId();
$numericTable = $this->getTableNumeric();
$this->idArchive = $this->getModel()->allocateNewArchiveId($numericTable);
return $this->idArchive;
}
/**
* Locks the archive table to generate a new archive ID.
*
* We lock to make sure that
* if several archiving processes are running at the same time (for different websites and/or periods)
* then they will each use a unique archive ID.
*
* @return int
*/
protected function insertNewArchiveId()
private function getModel()
{
$numericTable = $this->getTableNumeric();
$idSite = $this->idSite;
$this->acquireArchiveTableLock();
$locked = self::PREFIX_SQL_LOCK . Common::generateUniqId();
$date = date("Y-m-d H:i:s");
$insertSql = "INSERT INTO $numericTable "
. " SELECT IFNULL( MAX(idarchive), 0 ) + 1,
'" . $locked . "',
" . (int)$idSite . ",
'" . $date . "',
'" . $date . "',
0,
'" . $date . "',
0 "
. " FROM $numericTable as tb1";
Db::get()->exec($insertSql);
$this->releaseArchiveTableLock();
$selectIdSql = "SELECT idarchive FROM $numericTable WHERE name = ? LIMIT 1";
$id = Db::get()->fetchOne($selectIdSql, $locked);
return $id;
return new Model();
}
protected function logArchiveStatusAsIncomplete()
{
$statusWhileProcessing = self::DONE_ERROR;
$this->insertRecord($this->doneFlag, $statusWhileProcessing);
}
protected function deletePreviousArchiveStatus()
{
// without advisory lock here, the DELETE would acquire Exclusive Lock
$this->acquireArchiveTableLock();
Db::query("DELETE FROM " . $this->getTableNumeric() . "
WHERE idarchive = ? AND (name = '" . $this->doneFlag
. "' OR name LIKE '" . self::PREFIX_SQL_LOCK . "%')",
array($this->getIdArchive())
);
$this->releaseArchiveTableLock();
$this->insertRecord($this->doneFlag, self::DONE_ERROR);
}
protected function logArchiveStatusAsFinal()
{
$status = self::DONE_OK;
if ($this->isArchiveTemporary) {
$status = self::DONE_OK_TEMPORARY;
}
$this->insertRecord($this->doneFlag, $status);
}
@ -231,27 +183,37 @@ class ArchiveWriter
foreach ($records as $record) {
$this->insertRecord($record[0], $record[1]);
}
return true;
}
$bindSql = $this->getInsertRecordBind();
$values = array();
$values = array();
$valueSeen = false;
foreach ($records as $record) {
// don't record zero
if (empty($record[1])) continue;
if (empty($record[1])) {
continue;
}
$bind = $bindSql;
$bind[] = $record[0]; // name
$bind[] = $record[1]; // value
$bind = $bindSql;
$bind[] = $record[0]; // name
$bind[] = $record[1]; // value
$values[] = $bind;
$valueSeen = $record[1];
}
if (empty($values)) return true;
if (empty($values)) {
return true;
}
$tableName = $this->getTableNameToInsert($valueSeen);
BatchInsert::tableInsertBatch($tableName, $this->getInsertFields(), $values);
$fields = $this->getInsertFields();
BatchInsert::tableInsertBatch($tableName, $fields, $values, $throwException = false, $charset = 'latin1');
return true;
}
@ -270,26 +232,22 @@ class ArchiveWriter
}
$tableName = $this->getTableNameToInsert($value);
$fields = $this->getInsertFields();
$record = $this->getInsertRecordBind();
$this->getModel()->insertRecord($tableName, $fields, $record, $name, $value);
// duplicate idarchives are Ignored, see http://dev.piwik.org/trac/ticket/987
$query = "INSERT IGNORE INTO " . $tableName . "
(" . implode(", ", $this->getInsertFields()) . ")
VALUES (?,?,?,?,?,?,?,?)";
$bindSql = $this->getInsertRecordBind();
$bindSql[] = $name;
$bindSql[] = $value;
Db::query($query, $bindSql);
return true;
}
protected function getInsertRecordBind()
{
return array($this->getIdArchive(),
$this->idSite,
$this->dateStart->toString('Y-m-d'),
$this->period->getDateEnd()->toString('Y-m-d'),
$this->period->getId(),
date("Y-m-d H:i:s"));
$this->idSite,
$this->dateStart->toString('Y-m-d'),
$this->period->getDateEnd()->toString('Y-m-d'),
$this->period->getId(),
date("Y-m-d H:i:s"));
}
protected function getTableNameToInsert($value)
@ -297,6 +255,7 @@ class ArchiveWriter
if (is_numeric($value)) {
return $this->getTableNumeric();
}
return ArchiveTableCreator::getBlobTable($this->dateStart);
}

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -8,55 +8,53 @@
*/
namespace Piwik\DataAccess;
use PDOStatement;
use Piwik\ArchiveProcessor\Parameters;
use Piwik\Common;
use Piwik\DataArray;
use Piwik\Db;
use Piwik\Metrics;
use Piwik\Tracker\GoalManager;
/**
* Contains methods that calculate metrics by aggregating log data (visits, actions, conversions,
* ecommerce items).
*
*
* You can use the methods in this class within {@link Piwik\Plugin\Archiver Archiver} descendants
* to aggregate log data without having to write SQL queries.
*
*
* ### Aggregation Dimension
*
*
* All aggregation methods accept a **dimension** parameter. These parameters are important as
* they control how rows in a table are aggregated together.
*
*
* A **_dimension_** is just a table column. Rows that have the same values for these columns are
* aggregated together. The result of these aggregations is a set of metrics for every recorded value
* of a **dimension**.
*
*
* _Note: A dimension is essentially the same as a **GROUP BY** field._
*
*
* ### Examples
*
*
* **Aggregating visit data**
*
*
* $archiveProcessor = // ...
* $logAggregator = $archiveProcessor->getLogAggregator();
*
*
* // get metrics for every used browser language of all visits by returning visitors
* $query = $logAggregator->queryVisitsByDimension(
* $dimensions = array('log_visit.location_browser_lang'),
* $where = 'log_visit.visitor_returning = 1',
*
*
* // also count visits for each browser language that are not located in the US
* $additionalSelects = array('sum(case when log_visit.location_country <> 'us' then 1 else 0 end) as nonus'),
*
*
* // we're only interested in visits, unique visitors & actions, so don't waste time calculating anything else
* $metrics = array(Metrics::INDEX_NB_UNIQ_VISITORS, Metrics::INDEX_NB_VISITS, Metrics::INDEX_NB_ACTIONS),
* );
* if ($query === false) {
* return;
* }
*
*
* while ($row = $query->fetch()) {
* $uniqueVisitors = $row[Metrics::INDEX_NB_UNIQ_VISITORS];
* $visits = $row[Metrics::INDEX_NB_VISITS];
@ -89,8 +87,8 @@ use Piwik\Tracker\GoalManager;
* $country = $row['location_country'];
* $numEcommerceSales = $row[Metrics::INDEX_GOAL_NB_CONVERSIONS];
* $numVisitsWithEcommerceSales = $row[Metrics::INDEX_GOAL_NB_VISITS_CONVERTED];
* $avgTaxForCountry = $country['avg_tax'];
* $maxShippingForCountry = $country['max_shipping'];
* $avgTaxForCountry = $row['avg_tax'];
* $maxShippingForCountry = $row['max_shipping'];
*
* // ... do something with aggregated data ...
* }
@ -131,15 +129,20 @@ class LogAggregator
/** @var \Piwik\Date */
protected $dateEnd;
/** @var \Piwik\Site */
protected $site;
/** @var int[] */
protected $sites;
/** @var \Piwik\Segment */
protected $segment;
/**
* @var string
*/
private $queryOriginHint = '';
/**
* Constructor.
*
*
* @param \Piwik\ArchiveProcessor\Parameters $params
*/
public function __construct(Parameters $params)
@ -147,30 +150,44 @@ class LogAggregator
$this->dateStart = $params->getDateStart();
$this->dateEnd = $params->getDateEnd();
$this->segment = $params->getSegment();
$this->site = $params->getSite();
$this->sites = $params->getIdSites();
}
public function setQueryOriginHint($nameOfOrigiin)
{
$this->queryOriginHint = $nameOfOrigiin;
}
public function generateQuery($select, $from, $where, $groupBy, $orderBy)
{
$bind = $this->getBindDatetimeSite();
$bind = $this->getGeneralQueryBindParams();
$query = $this->segment->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy);
$select = 'SELECT';
if ($this->queryOriginHint && is_array($query) && 0 === strpos(trim($query['sql']), $select)) {
$query['sql'] = trim($query['sql']);
$query['sql'] = 'SELECT /* ' . $this->queryOriginHint . ' */' . substr($query['sql'], strlen($select));
}
return $query;
}
protected function getVisitsMetricFields()
{
return array(
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_VISIT_TABLE . ".idvisitor)",
Metrics::INDEX_NB_VISITS => "count(*)",
Metrics::INDEX_NB_ACTIONS => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
Metrics::INDEX_MAX_ACTIONS => "max(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
Metrics::INDEX_SUM_VISIT_LENGTH => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_time)",
Metrics::INDEX_BOUNCE_COUNT => "sum(case " . self::LOG_VISIT_TABLE . ".visit_total_actions when 1 then 1 when 0 then 1 else 0 end)",
Metrics::INDEX_NB_VISITS_CONVERTED => "sum(case " . self::LOG_VISIT_TABLE . ".visit_goal_converted when 1 then 1 else 0 end)",
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_VISIT_TABLE . ".idvisitor)",
Metrics::INDEX_NB_UNIQ_FINGERPRINTS => "count(distinct " . self::LOG_VISIT_TABLE . ".config_id)",
Metrics::INDEX_NB_VISITS => "count(*)",
Metrics::INDEX_NB_ACTIONS => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
Metrics::INDEX_MAX_ACTIONS => "max(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
Metrics::INDEX_SUM_VISIT_LENGTH => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_time)",
Metrics::INDEX_BOUNCE_COUNT => "sum(case " . self::LOG_VISIT_TABLE . ".visit_total_actions when 1 then 1 when 0 then 1 else 0 end)",
Metrics::INDEX_NB_VISITS_CONVERTED => "sum(case " . self::LOG_VISIT_TABLE . ".visit_goal_converted when 1 then 1 else 0 end)",
Metrics::INDEX_NB_USERS => "count(distinct " . self::LOG_VISIT_TABLE . ".user_id)",
);
}
static public function getConversionsMetricFields()
public static function getConversionsMetricFields()
{
return array(
Metrics::INDEX_GOAL_NB_CONVERSIONS => "count(*)",
@ -184,12 +201,12 @@ class LogAggregator
);
}
static private function getSqlConversionRevenueSum($field)
private static function getSqlConversionRevenueSum($field)
{
return self::getSqlRevenue('SUM(' . self::LOG_CONVERSION_TABLE . '.' . $field . ')');
}
static public function getSqlRevenue($field)
public static function getSqlRevenue($field)
{
return "ROUND(" . $field . "," . GoalManager::REVENUE_PRECISION . ")";
}
@ -271,7 +288,7 @@ class LogAggregator
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
* @param bool|array $metrics The set of metrics to calculate and return. If false, the query will select
* all of them. The following values can be used:
*
*
* - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
* - {@link Piwik\Metrics::INDEX_NB_VISITS}
* - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
@ -293,52 +310,61 @@ class LogAggregator
$tableName = self::LOG_VISIT_TABLE;
$availableMetrics = $this->getVisitsMetricFields();
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::VISIT_DATETIME_FIELD, $where);
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::VISIT_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
if ($rankingQuery) {
$orderBy = '`' . Metrics::INDEX_NB_VISITS . '` DESC';
}
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
if ($rankingQuery) {
unset($availableMetrics[Metrics::INDEX_MAX_ACTIONS]);
$sumColumns = array_keys($availableMetrics);
if ($metrics) {
$sumColumns = array_intersect($sumColumns, $metrics);
}
$rankingQuery->addColumn($sumColumns, 'sum');
if ($this->isMetricRequested(Metrics::INDEX_MAX_ACTIONS, $metrics)) {
$rankingQuery->addColumn(Metrics::INDEX_MAX_ACTIONS, 'max');
}
return $rankingQuery->execute($query['sql'], $query['bind']);
}
return $this->getDb()->query($query['sql'], $query['bind']);
}
protected function getSelectsMetrics($metricsAvailable, $metricsRequested = false)
{
$selects = array();
foreach ($metricsAvailable as $metricId => $statement) {
if ($this->isMetricRequested($metricId, $metricsRequested)) {
$aliasAs = $this->getSelectAliasAs($metricId);
$aliasAs = $this->getSelectAliasAs($metricId);
$selects[] = $statement . $aliasAs;
}
}
return $selects;
}
protected function getSelectStatement($dimensions, $tableName, $additionalSelects, array $availableMetrics, $requestedMetrics = false)
{
$dimensionsToSelect = $this->getDimensionsToSelect($dimensions, $additionalSelects);
$selects = array_merge(
$this->getSelectDimensions($dimensionsToSelect, $tableName),
$this->getSelectsMetrics($availableMetrics, $requestedMetrics),
!empty($additionalSelects) ? $additionalSelects : array()
);
$select = implode(self::FIELDS_SEPARATOR, $selects);
return $select;
}
@ -355,6 +381,7 @@ class LogAggregator
if (empty($additionalSelects)) {
return $dimensions;
}
$dimensionsToSelect = array();
foreach ($dimensions as $selectAs => $dimension) {
$asAlias = $this->getSelectAliasAs($dimension);
@ -364,6 +391,7 @@ class LogAggregator
}
}
}
$dimensionsToSelect = array_unique($dimensionsToSelect);
return $dimensionsToSelect;
}
@ -382,6 +410,7 @@ class LogAggregator
{
foreach ($dimensions as $selectAs => &$field) {
$selectAsString = $field;
if (!is_numeric($selectAs)) {
$selectAsString = $selectAs;
} else {
@ -390,16 +419,18 @@ class LogAggregator
$selectAsString = $appendSelectAs = false;
}
}
$isKnownField = !in_array($field, array('referrer_data'));
if ($selectAsString == $field
&& $isKnownField
) {
if ($selectAsString == $field && $isKnownField) {
$field = $this->prefixColumn($field, $tableName);
}
if ($appendSelectAs && $selectAsString) {
$field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString);
}
}
return $dimensions;
}
@ -422,7 +453,7 @@ class LogAggregator
protected function isFieldFunctionOrComplexExpression($field)
{
return strpos($field, "(") !== false
|| strpos($field, "CASE") !== false;
|| strpos($field, "CASE") !== false;
}
protected function getSelectAliasAs($metricId)
@ -432,32 +463,50 @@ class LogAggregator
protected function isMetricRequested($metricId, $metricsRequested)
{
return $metricsRequested === false
|| in_array($metricId, $metricsRequested);
// do not process INDEX_NB_UNIQ_FINGERPRINTS unless specifically asked for
if ($metricsRequested === false) {
if ($metricId == Metrics::INDEX_NB_UNIQ_FINGERPRINTS) {
return false;
}
return true;
}
return in_array($metricId, $metricsRequested);
}
protected function getWhereStatement($tableName, $datetimeField, $extraWhere = false)
{
$where = "$tableName.$datetimeField >= ?
AND $tableName.$datetimeField <= ?
AND $tableName.idsite = ?";
AND $tableName.idsite IN (". Common::getSqlStringFieldsArray($this->sites) . ")";
if (!empty($extraWhere)) {
$extraWhere = sprintf($extraWhere, $tableName, $tableName);
$where .= ' AND ' . $extraWhere;
$where .= ' AND ' . $extraWhere;
}
return $where;
}
protected function getGroupByStatement($dimensions, $tableName)
{
$dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false);
$groupBy = implode(", ", $dimensions);
$groupBy = implode(", ", $dimensions);
return $groupBy;
}
protected function getBindDatetimeSite()
/**
* Returns general bind parameters for all log aggregation queries. This includes the datetime
* start of entities, datetime end of entities and IDs of all sites.
*
* @return array
*/
protected function getGeneralQueryBindParams()
{
return array($this->dateStart->getDateStartUTC(), $this->dateEnd->getDateEndUTC(), $this->site->getId());
$bind = array($this->dateStart->getDateStartUTC(), $this->dateEnd->getDateEndUTC());
$bind = array_merge($bind, $this->sites);
return $bind;
}
/**
@ -487,7 +536,7 @@ class LogAggregator
*
* @param string $dimension One or more **log\_conversion\_item** columns to group aggregated data by.
* Eg, `'idaction_sku'` or `'idaction_sku, idaction_category'`.
* @return Zend_Db_Statement A statement object that can be used to iterate through the query's
* @return \Zend_Db_Statement A statement object that can be used to iterate through the query's
* result set. See [above](#queryEcommerceItems-result-set) to learn more
* about what this query selects.
* @api
@ -547,7 +596,7 @@ class LogAggregator
array(
'log_conversion_item.server_time >= ?',
'log_conversion_item.server_time <= ?',
'log_conversion_item.idsite = ?',
'log_conversion_item.idsite IN (' . Common::getSqlStringFieldsArray($this->sites) . ')',
'log_conversion_item.deleted = 0'
)
),
@ -593,7 +642,7 @@ class LogAggregator
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
* @param bool|array $metrics The set of metrics to calculate and return. If `false`, the query will select
* all of them. The following values can be used:
*
*
* - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
* - {@link Piwik\Metrics::INDEX_NB_VISITS}
* - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
@ -604,7 +653,7 @@ class LogAggregator
* log_action should be joined on. The table alias used for each join
* is `"log_action$i"` where `$i` is the index of the column in this
* array.
*
*
* If a string is used for this parameter, the table alias is not
* suffixed (since there is only one column).
* @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
@ -617,9 +666,9 @@ class LogAggregator
$tableName = self::LOG_ACTIONS_TABLE;
$availableMetrics = $this->getActionsMetricFields();
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::ACTION_DATETIME_FIELD, $where);
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::ACTION_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
@ -631,12 +680,14 @@ class LogAggregator
foreach ($joinLogActionOnColumn as $i => $joinColumn) {
$tableAlias = 'log_action' . ($multiJoin ? $i + 1 : '');
if (strpos($joinColumn, ' ') === false) {
$joinOn = $tableAlias . '.idaction = ' . $tableName . '.' . $joinColumn;
} else {
// more complex join column like IF(...)
// more complex join column like if (...)
$joinOn = $tableAlias . '.idaction = ' . $joinColumn;
}
$from[] = array(
'table' => 'log_action',
'tableAlias' => $tableAlias,
@ -656,7 +707,9 @@ class LogAggregator
if ($metrics) {
$sumColumns = array_intersect($sumColumns, $metrics);
}
$rankingQuery->addColumn($sumColumns, 'sum');
return $rankingQuery->execute($query['sql'], $query['bind']);
}
@ -665,7 +718,7 @@ class LogAggregator
protected function getActionsMetricFields()
{
return $availableMetrics = array(
return array(
Metrics::INDEX_NB_VISITS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisit)",
Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisitor)",
Metrics::INDEX_NB_ACTIONS => "count(*)",
@ -691,32 +744,32 @@ class LogAggregator
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL}**: The total cost of all ecommerce items sold
* within these conversions. This value does not
* include tax, shipping or any applied discount.
*
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX}**: The total tax applied to every transaction in these
* conversions.
*
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING}**: The total shipping cost for every transaction
* in these conversions.
*
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT}**: The total discount applied to every transaction
* in these conversions.
*
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
* - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_ITEMS}**: The total number of ecommerce items sold in each transaction
* in these conversions.
*
*
* _This metric is only applicable to the special
* **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
*
*
* Additional data can be selected through the `$additionalSelects` parameter.
*
*
* _Note: This method will only query the **log_conversion** table. Other tables cannot be joined
* using this method._
*
@ -726,21 +779,22 @@ class LogAggregator
* @param bool|string $where An optional SQL expression used in the SQL's **WHERE** clause.
* @param array $additionalSelects Additional SELECT fields that are not included in the group by
* clause. These can be aggregate expressions, eg, `SUM(somecol)`.
* @return Zend_Db_Statement
* @return \Zend_Db_Statement
*/
public function queryConversionsByDimension($dimensions = array(), $where = false, $additionalSelects = array())
{
$dimensions = array_merge(array(self::IDGOAL_FIELD), $dimensions);
$tableName = self::LOG_CONVERSION_TABLE;
$availableMetrics = $this->getConversionsMetricFields();
$tableName = self::LOG_CONVERSION_TABLE;
$select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::CONVERSION_DATETIME_FIELD, $where);
$from = array($tableName);
$where = $this->getWhereStatement($tableName, self::CONVERSION_DATETIME_FIELD, $where);
$groupBy = $this->getGroupByStatement($dimensions, $tableName);
$orderBy = false;
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
$query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
return $this->getDb()->query($query['sql'], $query['bind']);
}
@ -750,9 +804,9 @@ class LogAggregator
*
* **Note:** The result of this function is meant for use in the `$additionalSelects` parameter
* in one of the query... methods (for example {@link queryVisitsByDimension()}).
*
*
* **Example**
*
*
* // summarize one column
* $visitTotalActionsRanges = array(
* array(1, 1),
@ -760,7 +814,7 @@ class LogAggregator
* array(10)
* );
* $selects = LogAggregator::getSelectsFromRangedColumn('visit_total_actions', $visitTotalActionsRanges, 'log_visit', 'vta');
*
*
* // summarize another column in the same request
* $visitCountVisitsRanges = array(
* array(1, 1),
@ -771,17 +825,17 @@ class LogAggregator
* $selects,
* LogAggregator::getSelectsFromRangedColumn('visitor_count_visits', $visitCountVisitsRanges, 'log_visit', 'vcv')
* );
*
*
* // perform the query
* $logAggregator = // get the LogAggregator somehow
* $query = $logAggregator->queryVisitsByDimension($dimensions = array(), $where = false, $selects);
* $tableSummary = $query->fetch();
*
*
* $numberOfVisitsWithOneAction = $tableSummary['vta0'];
* $numberOfVisitsBetweenTwoAnd10 = $tableSummary['vta1'];
*
*
* $numberOfVisitsWithVisitCountOfOne = $tableSummary['vcv0'];
*
*
* @param string $column The name of a column in `$table` that will be summarized.
* @param array $ranges The array of ranges over which the data in the table
* will be summarized. For example,
@ -817,14 +871,16 @@ class LogAggregator
{
$selects = array();
$extraCondition = '';
if ($restrictToReturningVisitors) {
// extra condition for the SQL SELECT that makes sure only returning visits are counted
// when creating the 'days since last visit' report
$extraCondition = 'and log_visit.visitor_returning = 1';
$extraSelect = "sum(case when log_visit.visitor_returning = 0 then 1 else 0 end) "
. " as `" . $selectColumnPrefix . 'General_NewVisits' . "`";
$extraSelect = "sum(case when log_visit.visitor_returning = 0 then 1 else 0 end) "
. " as `" . $selectColumnPrefix . 'General_NewVisits' . "`";
$selects[] = $extraSelect;
}
foreach ($ranges as $gap) {
if (count($gap) == 2) {
$lowerBound = $gap[0];
@ -833,12 +889,11 @@ class LogAggregator
$selectAs = "$selectColumnPrefix$lowerBound-$upperBound";
$selects[] = "sum(case when $table.$column between $lowerBound and $upperBound $extraCondition" .
" then 1 else 0 end) as `$selectAs`";
" then 1 else 0 end) as `$selectAs`";
} else {
$lowerBound = $gap[0];
$selectAs = $selectColumnPrefix . ($lowerBound + 1) . urlencode('+');
$selectAs = $selectColumnPrefix . ($lowerBound + 1) . urlencode('+');
$selects[] = "sum(case when $table.$column > $lowerBound $extraCondition then 1 else 0 end) as `$selectAs`";
}
}
@ -859,9 +914,10 @@ class LogAggregator
* value is used.
* @return array
*/
static public function makeArrayOneColumn($row, $columnName, $lookForThisPrefix = false)
public static function makeArrayOneColumn($row, $columnName, $lookForThisPrefix = false)
{
$cleanRow = array();
foreach ($row as $label => $count) {
if (empty($lookForThisPrefix)
|| strpos($label, $lookForThisPrefix) === 0
@ -870,6 +926,7 @@ class LogAggregator
$cleanRow[$cleanLabel] = array($columnName => $count);
}
}
return $cleanRow;
}

View file

@ -0,0 +1,391 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Exception;
use Piwik\Common;
use Piwik\Segment\SegmentExpression;
class LogQueryBuilder
{
public function getSelectQueryString(SegmentExpression $segmentExpression, $select, $from, $where, $bind, $groupBy,
$orderBy, $limit)
{
if (!is_array($from)) {
$from = array($from);
}
$fromInitially = $from;
if (!$segmentExpression->isEmpty()) {
$segmentExpression->parseSubExpressionsIntoSqlExpressions($from);
$segmentSql = $segmentExpression->getSql();
$where = $this->getWhereMatchBoth($where, $segmentSql['where']);
$bind = array_merge($bind, $segmentSql['bind']);
}
$joins = $this->generateJoinsString($from);
$joinWithSubSelect = $joins['joinWithSubSelect'];
$from = $joins['sql'];
// hack for https://github.com/piwik/piwik/issues/9194#issuecomment-164321612
$useSpecialConversionGroupBy = (!empty($segmentSql)
&& strpos($groupBy, 'log_conversion.idgoal') !== false
&& $fromInitially == array('log_conversion')
&& strpos($from, 'log_link_visit_action') !== false);
if ($useSpecialConversionGroupBy) {
$innerGroupBy = "CONCAT(log_conversion.idvisit, '_' , log_conversion.idgoal, '_', log_conversion.buster)";
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit, $innerGroupBy);
} elseif ($joinWithSubSelect) {
$sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit);
} else {
$sql = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit);
}
return array(
'sql' => $sql,
'bind' => $bind
);
}
private function hasJoinedTableAlreadyManually($tableToFind, $joinToFind, $tables)
{
foreach ($tables as $index => $table) {
if (is_array($table)
&& !empty($table['table'])
&& $table['table'] === $tableToFind
&& (!isset($table['tableAlias']) || $table['tableAlias'] === $tableToFind)
&& isset($table['joinOn']) && $table['joinOn'] === $joinToFind) {
return true;
}
}
return false;
}
private function findIndexOfManuallyAddedTable($tableToFind, $tables)
{
foreach ($tables as $index => $table) {
if (is_array($table)
&& !empty($table['table'])
&& $table['table'] === $tableToFind
&& (!isset($table['tableAlias']) || $table['tableAlias'] === $tableToFind)) {
return $index;
}
}
}
private function hasTableAddedManually($tableToFind, $tables)
{
$table = $this->findIndexOfManuallyAddedTable($tableToFind, $tables);
return isset($table);
}
/**
* Generate the join sql based on the needed tables
* @param array $tables tables to join
* @throws Exception if tables can't be joined
* @return array
*/
private function generateJoinsString(&$tables)
{
$knownTables = array("log_action", "log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item");
$visitsAvailable = $linkVisitActionsTableAvailable = $conversionsAvailable = $conversionItemAvailable = $actionsTableAvailable = false;
$defaultLogActionJoin = "log_link_visit_action.idaction_url = log_action.idaction";
$joinWithSubSelect = false;
$sql = '';
// make sure the tables are joined in the right order
// base table first, then action before conversion
// this way, conversions can be left joined on idvisit
$actionIndex = array_search("log_link_visit_action", $tables);
$conversionIndex = array_search("log_conversion", $tables);
if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) {
$tables[$actionIndex] = "log_conversion";
$tables[$conversionIndex] = "log_link_visit_action";
}
// same as above: action before visit
$actionIndex = array_search("log_link_visit_action", $tables);
$visitIndex = array_search("log_visit", $tables);
if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) {
$tables[$actionIndex] = "log_visit";
$tables[$visitIndex] = "log_link_visit_action";
}
// we need to add log_link_visit_action dynamically to join eg visit with action
$linkVisitAction = array_search("log_link_visit_action", $tables);
$actionIndex = array_search("log_action", $tables);
if ($linkVisitAction === false && $actionIndex > 0) {
$tables[] = "log_link_visit_action";
}
if ($actionIndex > 0
&& $this->hasTableAddedManually('log_action', $tables)
&& !$this->hasJoinedTableAlreadyManually('log_action', $defaultLogActionJoin, $tables)) {
// we cannot join the same table with same alias twice, therefore we need to combine the join via AND
$tableIndex = $this->findIndexOfManuallyAddedTable('log_action', $tables);
$defaultLogActionJoin = '(' . $tables[$tableIndex]['joinOn'] . ' AND ' . $defaultLogActionJoin . ')';
unset($tables[$tableIndex]);
}
$linkVisitAction = array_search("log_link_visit_action", $tables);
$actionIndex = array_search("log_action", $tables);
if ($linkVisitAction > 0 && $actionIndex > 0 && $linkVisitAction > $actionIndex) {
$tables[$actionIndex] = "log_link_visit_action";
$tables[$linkVisitAction] = "log_action";
}
foreach ($tables as $i => $table) {
if (is_array($table)) {
// join condition provided
$alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
$sql .= "
LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias
. " ON " . $table['joinOn'];
continue;
}
if (!in_array($table, $knownTables)) {
throw new Exception("Table '$table' can't be used for segmentation");
}
$tableSql = Common::prefixTable($table) . " AS $table";
if ($i == 0) {
// first table
$sql .= $tableSql;
} else {
if ($linkVisitActionsTableAvailable && $table === 'log_action') {
$join = $defaultLogActionJoin;
if ($this->hasJoinedTableAlreadyManually($table, $join, $tables)) {
$actionsTableAvailable = true;
continue;
}
} elseif ($linkVisitActionsTableAvailable && $table == "log_conversion") {
// have actions, need conversions => join on idvisit
$join = "log_conversion.idvisit = log_link_visit_action.idvisit";
} elseif ($linkVisitActionsTableAvailable && $table == "log_visit") {
// have actions, need visits => join on idvisit
$join = "log_visit.idvisit = log_link_visit_action.idvisit";
if ($this->hasJoinedTableAlreadyManually($table, $join, $tables)) {
$visitsAvailable = true;
continue;
}
} elseif ($visitsAvailable && $table == "log_link_visit_action") {
// have visits, need actions => we have to use a more complex join
// we don't hande this here, we just return joinWithSubSelect=true in this case
$joinWithSubSelect = true;
$join = "log_link_visit_action.idvisit = log_visit.idvisit";
if ($this->hasJoinedTableAlreadyManually($table, $join, $tables)) {
$linkVisitActionsTableAvailable = true;
continue;
}
} elseif ($conversionsAvailable && $table == "log_link_visit_action") {
// have conversions, need actions => join on idvisit
$join = "log_conversion.idvisit = log_link_visit_action.idvisit";
} elseif (($visitsAvailable && $table == "log_conversion")
|| ($conversionsAvailable && $table == "log_visit")
) {
// have visits, need conversion (or vice versa) => join on idvisit
// notice that joining conversions on visits has lower priority than joining it on actions
$join = "log_conversion.idvisit = log_visit.idvisit";
// if conversions are joined on visits, we need a complex join
if ($table == "log_conversion") {
$joinWithSubSelect = true;
}
} elseif ($conversionItemAvailable && $table === 'log_visit') {
$join = "log_conversion_item.idvisit = log_visit.idvisit";
} elseif ($conversionItemAvailable && $table === 'log_link_visit_action') {
$join = "log_conversion_item.idvisit = log_link_visit_action.idvisit";
} elseif ($conversionItemAvailable && $table === 'log_conversion') {
$join = "log_conversion_item.idvisit = log_conversion.idvisit";
} else {
throw new Exception("Table '$table' can't be joined for segmentation");
}
// the join sql the default way
$sql .= "
LEFT JOIN $tableSql ON $join";
}
// remember which tables are available
$visitsAvailable = ($visitsAvailable || $table == "log_visit");
$linkVisitActionsTableAvailable = ($linkVisitActionsTableAvailable || $table == "log_link_visit_action");
$actionsTableAvailable = ($actionsTableAvailable || $table == "log_action");
$conversionsAvailable = ($conversionsAvailable || $table == "log_conversion");
$conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item");
}
$return = array(
'sql' => $sql,
'joinWithSubSelect' => $joinWithSubSelect
);
return $return;
}
/**
* Build a select query where actions have to be joined on visits (or conversions)
* In this case, the query gets wrapped in another query so that grouping by visit is possible
* @param string $select
* @param string $from
* @param string $where
* @param string $groupBy
* @param string $orderBy
* @param string $limit
* @param null|string $innerGroupBy If given, this inner group by will be used. If not, we try to detect one
* @throws Exception
* @return string
*/
private function buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit, $innerGroupBy = null)
{
$matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)";
preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches);
$neededFields = array_unique($matches[0]);
if (count($neededFields) == 0) {
throw new Exception("No needed fields found in select expression. "
. "Please use a table prefix.");
}
preg_match_all("/". $matchTables . "/", $from, $matchesFrom);
$innerSelect = implode(", \n", $neededFields);
$innerFrom = $from;
$innerWhere = $where;
$innerLimit = $limit;
if (!isset($innerGroupBy) && in_array('log_visit', $matchesFrom[1])) {
$innerGroupBy = "log_visit.idvisit";
} elseif (!isset($innerGroupBy)) {
throw new Exception('Cannot use subselect for join as no group by rule is specified');
}
$innerOrderBy = "NULL";
if ($innerLimit && $orderBy) {
// only When LIMITing we can apply to the inner query the same ORDER BY as the parent query
$innerOrderBy = $orderBy;
}
if ($innerLimit) {
// When LIMITing, no need to GROUP BY (GROUPing by is done before the LIMIT which is super slow when large amount of rows is matched)
$innerGroupBy = false;
}
$innerQuery = $this->buildSelectQuery($innerSelect, $innerFrom, $innerWhere, $innerGroupBy, $innerOrderBy, $innerLimit);
$select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select);
$from = "
(
$innerQuery
) AS log_inner";
$where = false;
$orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy);
$groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy);
$query = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit);
return $query;
}
/**
* Build select query the normal way
*
* @param string $select fieldlist to be selected
* @param string $from tablelist to select from
* @param string $where where clause
* @param string $groupBy group by clause
* @param string $orderBy order by clause
* @param string|int $limit limit by clause eg '5' for Limit 5 Offset 0 or '10, 5' for Limit 5 Offset 10
* @return string
*/
private function buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit)
{
$sql = "
SELECT
$select
FROM
$from";
if ($where) {
$sql .= "
WHERE
$where";
}
if ($groupBy) {
$sql .= "
GROUP BY
$groupBy";
}
if ($orderBy) {
$sql .= "
ORDER BY
$orderBy";
}
$sql = $this->appendLimitClauseToQuery($sql, $limit);
return $sql;
}
private function appendLimitClauseToQuery($sql, $limit)
{
$limitParts = explode(',', (string) $limit);
$isLimitWithOffset = 2 === count($limitParts);
if ($isLimitWithOffset) {
// $limit = "10, 5". We would not have to do this but we do to prevent possible injections.
$offset = trim($limitParts[0]);
$limit = trim($limitParts[1]);
$sql .= sprintf(' LIMIT %d, %d', $offset, $limit);
} else {
// $limit = "5"
$limit = (int)$limit;
if ($limit >= 1) {
$sql .= " LIMIT $limit";
}
}
return $sql;
}
/**
* @param $where
* @param $segmentWhere
* @return string
* @throws
*/
protected function getWhereMatchBoth($where, $segmentWhere)
{
if (empty($segmentWhere) && empty($where)) {
throw new \Exception("Segment where clause should be non empty.");
}
if (empty($segmentWhere)) {
return $where;
}
if (empty($where)) {
return $segmentWhere;
}
return "( $where )
AND
($segmentWhere)";
}
}

View file

@ -0,0 +1,358 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Exception;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Common;
use Piwik\Container\StaticContainer;
use Piwik\Db;
use Piwik\DbHelper;
use Piwik\Period;
use Piwik\Segment;
use Piwik\Sequence;
use Psr\Log\LoggerInterface;
/**
* Cleans up outdated archives
*
* @package Piwik\DataAccess
*/
class Model
{
/**
* @var LoggerInterface
*/
private $logger;
public function __construct(LoggerInterface $logger = null)
{
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
}
/**
* Returns the archives IDs that have already been invalidated and have been since re-processed.
*
* These archives { archive name (includes segment hash) , idsite, date, period } will be deleted.
*
* @param string $archiveTable
* @param array $idSites
* @return array
* @throws Exception
*/
public function getInvalidatedArchiveIdsSafeToDelete($archiveTable, array $idSites)
{
try {
Db::get()->query('SET SESSION group_concat_max_len=' . (128 * 1024));
} catch (\Exception $ex) {
$this->logger->info("Could not set group_concat_max_len MySQL session variable.");
}
$idSites = array_map(function ($v) { return (int)$v; }, $idSites);
$sql = "SELECT idsite, date1, date2, period, name,
GROUP_CONCAT(idarchive, '.', value ORDER BY ts_archived DESC) as archives
FROM `$archiveTable`
WHERE name LIKE 'done%'
AND value IN (" . ArchiveWriter::DONE_INVALIDATED . ','
. ArchiveWriter::DONE_OK . ','
. ArchiveWriter::DONE_OK_TEMPORARY . ")
AND idsite IN (" . implode(',', $idSites) . ")
GROUP BY idsite, date1, date2, period, name";
$archiveIds = array();
$rows = Db::fetchAll($sql);
foreach ($rows as $row) {
$duplicateArchives = explode(',', $row['archives']);
$firstArchive = array_shift($duplicateArchives);
list($firstArchiveId, $firstArchiveValue) = explode('.', $firstArchive);
// if the first archive (ie, the newest) is an 'ok' or 'ok temporary' archive, then
// all invalidated archives after it can be deleted
if ($firstArchiveValue == ArchiveWriter::DONE_OK
|| $firstArchiveValue == ArchiveWriter::DONE_OK_TEMPORARY
) {
foreach ($duplicateArchives as $pair) {
if (strpos($pair, '.') === false) {
$this->logger->info("GROUP_CONCAT cut off the query result, you may have to purge archives again.");
break;
}
list($idarchive, $value) = explode('.', $pair);
if ($value == ArchiveWriter::DONE_INVALIDATED) {
$archiveIds[] = $idarchive;
}
}
}
}
return $archiveIds;
}
/**
* @param string $archiveTable Prefixed table name
* @param int[] $idSites
* @param string[][] $datesByPeriodType
* @param Segment $segment
* @return \Zend_Db_Statement
* @throws Exception
*/
public function updateArchiveAsInvalidated($archiveTable, $idSites, $datesByPeriodType, Segment $segment = null)
{
$idSites = array_map('intval', $idSites);
$bind = array();
$periodConditions = array();
foreach ($datesByPeriodType as $periodType => $dates) {
$dateConditions = array();
foreach ($dates as $date) {
$dateConditions[] = "(date1 <= ? AND ? <= date2)";
$bind[] = $date;
$bind[] = $date;
}
$dateConditionsSql = implode(" OR ", $dateConditions);
if (empty($periodType)
|| $periodType == Period\Day::PERIOD_ID
) {
// invalidate all periods if no period supplied or period is day
$periodConditions[] = "($dateConditionsSql)";
} else if ($periodType == Period\Range::PERIOD_ID) {
$periodConditions[] = "(period = " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))";
} else {
// for non-day periods, invalidate greater periods, but not range periods
$periodConditions[] = "(period >= " . (int)$periodType . " AND period < " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))";
}
}
if ($segment) {
$nameCondition = "name LIKE '" . Rules::getDoneFlagArchiveContainsAllPlugins($segment) . "%'";
} else {
$nameCondition = "name LIKE 'done%'";
}
$sql = "UPDATE $archiveTable SET value = " . ArchiveWriter::DONE_INVALIDATED
. " WHERE $nameCondition
AND idsite IN (" . implode(", ", $idSites) . ")
AND (" . implode(" OR ", $periodConditions) . ")";
return Db::query($sql, $bind);
}
public function getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan)
{
$query = "SELECT idarchive FROM " . $archiveTable . "
WHERE name LIKE 'done%'
AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . "
AND ts_archived < ?)
OR value = " . ArchiveWriter::DONE_ERROR . ")";
return Db::fetchAll($query, array($purgeArchivesOlderThan));
}
public function deleteArchivesWithPeriod($numericTable, $blobTable, $period, $date)
{
$query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?";
$bind = array($period, $date);
$queryObj = Db::query(sprintf($query, $numericTable), $bind);
$deletedRows = $queryObj->rowCount();
try {
$queryObj = Db::query(sprintf($query, $blobTable), $bind);
$deletedRows += $queryObj->rowCount();
} catch (Exception $e) {
// Individual blob tables could be missing
$this->logger->debug("Unable to delete archives by period from {blobTable}.", array(
'blobTable' => $blobTable,
'exception' => $e,
));
}
return $deletedRows;
}
public function deleteArchiveIds($numericTable, $blobTable, $idsToDelete)
{
$idsToDelete = array_values($idsToDelete);
$query = "DELETE FROM %s WHERE idarchive IN (" . Common::getSqlStringFieldsArray($idsToDelete) . ")";
$queryObj = Db::query(sprintf($query, $numericTable), $idsToDelete);
$deletedRows = $queryObj->rowCount();
try {
$queryObj = Db::query(sprintf($query, $blobTable), $idsToDelete);
$deletedRows += $queryObj->rowCount();
} catch (Exception $e) {
// Individual blob tables could be missing
$this->logger->debug("Unable to delete archive IDs from {blobTable}.", array(
'blobTable' => $blobTable,
'exception' => $e,
));
}
return $deletedRows;
}
public function getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues)
{
$bindSQL = array($idSite,
$dateStartIso,
$dateEndIso,
$period,
);
$timeStampWhere = '';
if ($minDatetimeIsoArchiveProcessedUTC) {
$timeStampWhere = " AND ts_archived >= ? ";
$bindSQL[] = $minDatetimeIsoArchiveProcessedUTC;
}
$sqlWhereArchiveName = self::getNameCondition($doneFlags, $doneFlagValues);
$sqlQuery = "SELECT idarchive, value, name, date1 as startDate FROM $numericTable
WHERE idsite = ?
AND date1 = ?
AND date2 = ?
AND period = ?
AND ( ($sqlWhereArchiveName)
OR name = '" . ArchiveSelector::NB_VISITS_RECORD_LOOKED_UP . "'
OR name = '" . ArchiveSelector::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "')
$timeStampWhere
ORDER BY idarchive DESC";
$results = Db::fetchAll($sqlQuery, $bindSQL);
return $results;
}
public function createArchiveTable($tableName, $tableNamePrefix)
{
$db = Db::get();
$sql = DbHelper::getTableCreateSql($tableNamePrefix);
// replace table name template by real name
$tableNamePrefix = Common::prefixTable($tableNamePrefix);
$sql = str_replace($tableNamePrefix, $tableName, $sql);
try {
$db->query($sql);
} catch (Exception $e) {
// accept mysql error 1050: table already exists, throw otherwise
if (!$db->isErrNo($e, '1050')) {
throw $e;
}
}
try {
if (ArchiveTableCreator::NUMERIC_TABLE === ArchiveTableCreator::getTypeFromTableName($tableName)) {
$sequence = new Sequence($tableName);
$sequence->create();
}
} catch (Exception $e) {
}
}
public function allocateNewArchiveId($numericTable)
{
$sequence = new Sequence($numericTable);
try {
$idarchive = $sequence->getNextId();
} catch (Exception $e) {
// edge case: sequence was not found, create it now
$sequence->create();
$idarchive = $sequence->getNextId();
}
return $idarchive;
}
public function deletePreviousArchiveStatus($numericTable, $archiveId, $doneFlag)
{
$tableWithoutLeadingPrefix = $numericTable;
$lenNumericTableWithoutPrefix = strlen('archive_numeric_MM_YYYY');
if (strlen($numericTable) >= $lenNumericTableWithoutPrefix) {
$tableWithoutLeadingPrefix = substr($numericTable, strlen($numericTable) - $lenNumericTableWithoutPrefix);
// we need to make sure lock name is less than 64 characters see https://github.com/piwik/piwik/issues/9131
}
$dbLockName = "rmPrevArchiveStatus.$tableWithoutLeadingPrefix.$archiveId";
// without advisory lock here, the DELETE would acquire Exclusive Lock
$this->acquireArchiveTableLock($dbLockName);
Db::query("DELETE FROM $numericTable WHERE idarchive = ? AND (name = '" . $doneFlag . "')",
array($archiveId)
);
$this->releaseArchiveTableLock($dbLockName);
}
public function insertRecord($tableName, $fields, $record, $name, $value)
{
// duplicate idarchives are Ignored, see https://github.com/piwik/piwik/issues/987
$query = "INSERT IGNORE INTO " . $tableName . " (" . implode(", ", $fields) . ")
VALUES (?,?,?,?,?,?,?,?)";
$bindSql = $record;
$bindSql[] = $name;
$bindSql[] = $value;
Db::query($query, $bindSql);
return true;
}
/**
* Returns the site IDs for invalidated archives in an archive table.
*
* @param string $numericTable The numeric table to search through.
* @return int[]
*/
public function getSitesWithInvalidatedArchive($numericTable)
{
$rows = Db::fetchAll("SELECT DISTINCT idsite FROM `$numericTable` WHERE name LIKE 'done%' AND value = " . ArchiveWriter::DONE_INVALIDATED);
$result = array();
foreach ($rows as $row) {
$result[] = $row['idsite'];
}
return $result;
}
/**
* Returns the SQL condition used to find successfully completed archives that
* this instance is querying for.
*/
private static function getNameCondition($doneFlags, $possibleValues)
{
$allDoneFlags = "'" . implode("','", $doneFlags) . "'";
// create the SQL to find archives that are DONE
return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))";
}
protected function acquireArchiveTableLock($dbLockName)
{
if (Db::getDbLock($dbLockName, $maxRetries = 30) === false) {
throw new Exception("Cannot get named lock $dbLockName.");
}
}
protected function releaseArchiveTableLock($dbLockName)
{
Db::releaseDbLock($dbLockName);
}
}

View file

@ -0,0 +1,402 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\DataAccess;
use Piwik\Common;
use Piwik\Container\StaticContainer;
use Piwik\Db;
use Piwik\Plugin\Dimension\DimensionMetadataProvider;
/**
* DAO that queries log tables.
*/
class RawLogDao
{
const DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
/**
* @var DimensionMetadataProvider
*/
private $dimensionMetadataProvider;
public function __construct(DimensionMetadataProvider $provider = null)
{
$this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider');
}
/**
* @param array $values
* @param string $idVisit
*/
public function updateVisits(array $values, $idVisit)
{
$sql = "UPDATE " . Common::prefixTable('log_visit')
. " SET " . $this->getColumnSetExpressions(array_keys($values))
. " WHERE idvisit = ?";
$this->update($sql, $values, $idVisit);
}
/**
* @param array $values
* @param string $idVisit
*/
public function updateConversions(array $values, $idVisit)
{
$sql = "UPDATE " . Common::prefixTable('log_conversion')
. " SET " . $this->getColumnSetExpressions(array_keys($values))
. " WHERE idvisit = ?";
$this->update($sql, $values, $idVisit);
}
/**
* @param string $from
* @param string $to
* @return int
*/
public function countVisitsWithDatesLimit($from, $to)
{
$sql = "SELECT COUNT(*) AS num_rows"
. " FROM " . Common::prefixTable('log_visit')
. " WHERE visit_last_action_time >= ? AND visit_last_action_time < ?";
$bind = array($from, $to);
return (int) Db::fetchOne($sql, $bind);
}
/**
* Iterates over logs in a log table in chunks. Parameters to this function are as backend agnostic
* as possible w/o dramatically increasing code complexity.
*
* @param string $logTable The log table name. Unprefixed, eg, `log_visit`.
* @param array[] $conditions An array describing the conditions logs must match in the query. Translates to
* the WHERE part of a SELECT statement. Each element must contain three elements:
*
* * the column name
* * the operator (ie, '=', '<>', '<', etc.)
* * the operand (ie, a value)
*
* The elements are AND-ed together.
*
* Example:
*
* ```
* array(
* array('visit_first_action_time', '>=', ...),
* array('visit_first_action_time', '<', ...)
* )
* ```
* @param int $iterationStep The number of rows to query at a time.
* @param callable $callback The callback that processes each chunk of rows.
*/
public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback)
{
$idField = $this->getIdFieldForLogTable($logTable);
list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
$lastId = 0;
do {
$rows = Db::fetchAll($query, array_merge(array($lastId), $bind));
if (!empty($rows)) {
$lastId = $rows[count($rows) - 1][$idField];
$callback($rows);
}
} while (count($rows) == $iterationStep);
}
/**
* Deletes visits with the supplied IDs from log_visit. This method does not cascade, so rows in other tables w/
* the same visit ID will still exist.
*
* @param int[] $idVisits
* @return int The number of deleted rows.
*/
public function deleteVisits($idVisits)
{
$sql = "DELETE FROM `" . Common::prefixTable('log_visit') . "` WHERE idvisit IN "
. $this->getInFieldExpressionWithInts($idVisits);
$statement = Db::query($sql);
return $statement->rowCount();
}
/**
* Deletes visit actions for the supplied visit IDs from log_link_visit_action.
*
* @param int[] $visitIds
* @return int The number of deleted rows.
*/
public function deleteVisitActionsForVisits($visitIds)
{
$sql = "DELETE FROM `" . Common::prefixTable('log_link_visit_action') . "` WHERE idvisit IN "
. $this->getInFieldExpressionWithInts($visitIds);
$statement = Db::query($sql);
return $statement->rowCount();
}
/**
* Deletes conversions for the supplied visit IDs from log_conversion. This method does not cascade, so
* conversion items will not be deleted.
*
* @param int[] $visitIds
* @return int The number of deleted rows.
*/
public function deleteConversions($visitIds)
{
$sql = "DELETE FROM `" . Common::prefixTable('log_conversion') . "` WHERE idvisit IN "
. $this->getInFieldExpressionWithInts($visitIds);
$statement = Db::query($sql);
return $statement->rowCount();
}
/**
* Deletes conversion items for the supplied visit IDs from log_conversion_item.
*
* @param int[] $visitIds
* @return int The number of deleted rows.
*/
public function deleteConversionItems($visitIds)
{
$sql = "DELETE FROM `" . Common::prefixTable('log_conversion_item') . "` WHERE idvisit IN "
. $this->getInFieldExpressionWithInts($visitIds);
$statement = Db::query($sql);
return $statement->rowCount();
}
/**
* Deletes all unused entries from the log_action table. This method uses a temporary table to store used
* actions, and then deletes rows from log_action that are not in this temporary table.
*
* Table locking is required to avoid concurrency issues.
*
* @throws \Exception If table locking permission is not granted to the current MySQL user.
*/
public function deleteUnusedLogActions()
{
if (!Db::isLockPrivilegeGranted()) {
throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error.");
}
// get current max ID in log tables w/ idaction references.
$maxIds = $this->getMaxIdsInLogTables();
$this->createTempTableForStoringUsedActions();
// do large insert (inserting everything before maxIds) w/o locking tables...
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
// ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
$this->lockLogTables();
$this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
// delete before unlocking tables so there's no chance a new log row that references an
// unused action will be inserted.
$this->deleteUnusedActions();
Db::unlockAllTables();
}
/**
* Returns the list of the website IDs that received some visits between the specified timestamp.
*
* @param string $fromDateTime
* @param string $toDateTime
* @return bool true if there are visits for this site between the given timeframe, false if not
*/
public function hasSiteVisitsBetweenTimeframe($fromDateTime, $toDateTime, $idSite)
{
$sites = Db::fetchOne("SELECT 1
FROM " . Common::prefixTable('log_visit') . "
WHERE idsite = ?
AND visit_last_action_time > ?
AND visit_last_action_time < ?
LIMIT 1", array($idSite, $fromDateTime, $toDateTime));
return (bool) $sites;
}
/**
* @param array $columnsToSet
* @return string
*/
protected function getColumnSetExpressions(array $columnsToSet)
{
$columnsToSet = array_map(
function ($column) {
return $column . ' = ?';
},
$columnsToSet
);
return implode(', ', $columnsToSet);
}
/**
* @param array $values
* @param $idVisit
* @param $sql
* @return \Zend_Db_Statement
* @throws \Exception
*/
protected function update($sql, array $values, $idVisit)
{
return Db::query($sql, array_merge(array_values($values), array($idVisit)));
}
private function getIdFieldForLogTable($logTable)
{
switch ($logTable) {
case 'log_visit':
return 'idvisit';
case 'log_link_visit_action':
return 'idlink_va';
case 'log_conversion':
return 'idvisit';
case 'log_conversion_item':
return 'idvisit';
case 'log_action':
return 'idaction';
default:
throw new \InvalidArgumentException("Unknown log table '$logTable'.");
}
}
// TODO: instead of creating a log query like this, we should re-use segments. to do this, however, there must be a 1-1
// mapping for dimensions => segments, and each dimension should automatically have a segment.
private function createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep)
{
$bind = array();
$sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE $idField > ?";
foreach ($conditions as $condition) {
list($column, $operator, $value) = $condition;
if (is_array($value)) {
$sql .= " AND $column IN (" . Common::getSqlStringFieldsArray($value) . ")";
$bind = array_merge($bind, $value);
} else {
$sql .= " AND $column $operator ?";
$bind[] = $value;
}
}
$sql .= " ORDER BY $idField ASC LIMIT " . (int)$iterationStep;
return array($sql, $bind);
}
private function getInFieldExpressionWithInts($idVisits)
{
$sql = "(";
$isFirst = true;
foreach ($idVisits as $idVisit) {
if ($isFirst) {
$isFirst = false;
} else {
$sql .= ', ';
}
$sql .= (int)$idVisit;
}
$sql .= ")";
return $sql;
}
private function getMaxIdsInLogTables()
{
$tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item');
$idColumns = $this->getTableIdColumns();
$result = array();
foreach ($tables as $table) {
$idCol = $idColumns[$table];
$result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
}
return $result;
}
private function createTempTableForStoringUsedActions()
{
$sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME) . " (
idaction INT(11),
PRIMARY KEY (idaction)
)";
Db::query($sql);
}
// protected for testing purposes
protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000)
{
$tempTableName = Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
$idColumns = $this->getTableIdColumns();
foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) {
$idCol = $idColumns[$table];
foreach ($columns as $col) {
$select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
$sql = "INSERT IGNORE INTO $tempTableName $select";
if ($olderThan) {
$start = 0;
$finish = $maxIds[$table];
} else {
$start = $maxIds[$table];
$finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
}
Db::segmentedQuery($sql, $start, $finish, $insertIntoTempIterationStep);
}
}
}
private function lockLogTables()
{
Db::lockTables(
$readLocks = Common::prefixTables('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item'),
$writeLocks = Common::prefixTables('log_action')
);
}
private function deleteUnusedActions()
{
list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
$deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
FROM $logActionTable
LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
WHERE tmp.idaction IS NULL";
Db::query($deleteSql);
}
private function getTableIdColumns()
{
return array(
'log_link_visit_action' => 'idlink_va',
'log_conversion' => 'idvisit',
'log_visit' => 'idvisit',
'log_conversion_item' => 'idvisit'
);
}
}

View file

@ -0,0 +1,56 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\DataAccess;
use Piwik\Db;
/**
* Data Access Object that can be used to get metadata information about
* the MySQL tables Piwik uses.
*/
class TableMetadata
{
/**
* Returns the list of column names for a table.
*
* @param string $table Prefixed table name.
* @return string[] List of column names..
*/
public function getColumns($table)
{
$table = str_replace("`", "", $table);
$columns = Db::fetchAll("SHOW COLUMNS FROM `" . $table . "`");
$columnNames = array();
foreach ($columns as $column) {
$columnNames[] = $column['Field'];
}
return $columnNames;
}
/**
* Returns the list of idaction columns in a table. A column is
* assumed to be an idaction reference if it has `"idaction"` in its
* name (eg, `"idaction_url"` or `"idaction_content_name"`.
*
* @param string $table Prefixed table name.
* @return string[]
*/
public function getIdActionColumnNames($table)
{
$columns = $this->getColumns($table);
$columns = array_filter($columns, function ($columnName) {
return strpos($columnName, 'idaction') !== false;
});
return array_values($columns);
}
}