update Piwik to version 2.16 (fixes #91)

This commit is contained in:
oliver 2016-04-10 18:55:57 +02:00
commit d885a4baa9
5833 changed files with 418860 additions and 226988 deletions

View file

@ -0,0 +1,317 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\Archive;
use Piwik\Archive\ArchiveInvalidator\InvalidationResult;
use Piwik\CronArchive\SitesToReprocessDistributedList;
use Piwik\DataAccess\ArchiveTableCreator;
use Piwik\DataAccess\Model;
use Piwik\Date;
use Piwik\Option;
use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList;
use Piwik\Plugins\PrivacyManager\PrivacyManager;
use Piwik\Period;
use Piwik\Segment;
/**
* Service that can be used to invalidate archives or add archive references to a list so they will
* be invalidated later.
*
* Archives are put in an "invalidated" state by setting the done flag to `ArchiveWriter::DONE_INVALIDATED`.
* This class also adds the archive's associated site to the a distributed list and adding the archive's year month to another
* distributed list.
*
* CronArchive will reprocess the archive data for all sites in the first list, and a scheduled task
* will purge the old, invalidated data in archive tables identified by the second list.
*
* Until CronArchive, or browser triggered archiving, re-processes data for an invalidated archive, the invalidated
* archive data will still be displayed in the UI and API.
*
* ### Deferred Invalidation
*
* Invalidating archives means running queries on one or more archive tables. In some situations, like during
* tracking, this is not desired. In such cases, archive references can be added to a list via the
* rememberToInvalidateArchivedReportsLater method, which will add the reference to a distributed list
*
* Later, during Piwik's normal execution, the list will be read and every archive it references will
* be invalidated.
*/
class ArchiveInvalidator
{
private $rememberArchivedReportIdStart = 'report_to_invalidate_';
/**
* @var Model
*/
private $model;
public function __construct(Model $model)
{
$this->model = $model;
}
public function rememberToInvalidateArchivedReportsLater($idSite, Date $date)
{
$key = $this->buildRememberArchivedReportId($idSite, $date->toString());
$value = Option::get($key);
// we do not really have to get the value first. we could simply always try to call set() and it would update or
// insert the record if needed but we do not want to lock the table (especially since there are still some
// MyISAM installations)
if (false === $value) {
Option::set($key, '1');
}
}
public function getRememberedArchivedReportsThatShouldBeInvalidated()
{
$reports = Option::getLike($this->rememberArchivedReportIdStart . '%_%');
$sitesPerDay = array();
foreach ($reports as $report => $value) {
$report = str_replace($this->rememberArchivedReportIdStart, '', $report);
$report = explode('_', $report);
$siteId = (int) $report[0];
$date = $report[1];
if (empty($sitesPerDay[$date])) {
$sitesPerDay[$date] = array();
}
$sitesPerDay[$date][] = $siteId;
}
return $sitesPerDay;
}
private function buildRememberArchivedReportId($idSite, $date)
{
$id = $this->buildRememberArchivedReportIdForSite($idSite);
$id .= '_' . trim($date);
return $id;
}
private function buildRememberArchivedReportIdForSite($idSite)
{
return $this->rememberArchivedReportIdStart . (int) $idSite;
}
public function forgetRememberedArchivedReportsToInvalidateForSite($idSite)
{
$id = $this->buildRememberArchivedReportIdForSite($idSite) . '_%';
Option::deleteLike($id);
}
/**
* @internal
*/
public function forgetRememberedArchivedReportsToInvalidate($idSite, Date $date)
{
$id = $this->buildRememberArchivedReportId($idSite, $date->toString());
Option::delete($id);
}
/**
* @param $idSites int[]
* @param $dates Date[]
* @param $period string
* @param $segment Segment
* @param bool $cascadeDown
* @return InvalidationResult
* @throws \Exception
*/
public function markArchivesAsInvalidated(array $idSites, array $dates, $period, Segment $segment = null, $cascadeDown = false)
{
$invalidationInfo = new InvalidationResult();
$datesToInvalidate = $this->removeDatesThatHaveBeenPurged($dates, $invalidationInfo);
if (empty($period)) {
// if the period is empty, we don't need to cascade in any way, since we'll remove all periods
$periodDates = $this->getDatesByYearMonthAndPeriodType($dates);
} else {
$periods = $this->getPeriodsToInvalidate($datesToInvalidate, $period, $cascadeDown);
$periodDates = $this->getPeriodDatesByYearMonthAndPeriodType($periods);
}
$periodDates = $this->getUniqueDates($periodDates);
$this->markArchivesInvalidated($idSites, $periodDates, $segment);
$yearMonths = array_keys($periodDates);
$this->markInvalidatedArchivesForReprocessAndPurge($idSites, $yearMonths);
foreach ($idSites as $idSite) {
foreach ($dates as $date) {
$this->forgetRememberedArchivedReportsToInvalidate($idSite, $date);
}
}
return $invalidationInfo;
}
/**
* @param string[][][] $periodDates
* @return string[][][]
*/
private function getUniqueDates($periodDates)
{
$result = array();
foreach ($periodDates as $yearMonth => $periodsByYearMonth) {
foreach ($periodsByYearMonth as $periodType => $periods) {
$result[$yearMonth][$periodType] = array_unique($periods);
}
}
return $result;
}
/**
* @param Date[] $dates
* @param string $periodType
* @param bool $cascadeDown
* @return Period[]
*/
private function getPeriodsToInvalidate($dates, $periodType, $cascadeDown)
{
$periodsToInvalidate = array();
foreach ($dates as $date) {
if ($periodType == 'range') {
$date = $date . ',' . $date;
}
$period = Period\Factory::build($periodType, $date);
$periodsToInvalidate[] = $period;
if ($cascadeDown) {
$periodsToInvalidate = array_merge($periodsToInvalidate, $period->getAllOverlappingChildPeriods());
}
if ($periodType != 'year'
&& $periodType != 'range'
) {
$periodsToInvalidate[] = Period\Factory::build('year', $date);
}
}
return $periodsToInvalidate;
}
/**
* @param Period[] $periods
* @return string[][][]
*/
private function getPeriodDatesByYearMonthAndPeriodType($periods)
{
$result = array();
foreach ($periods as $period) {
$date = $period->getDateStart();
$periodType = $period->getId();
$yearMonth = ArchiveTableCreator::getTableMonthFromDate($date);
$result[$yearMonth][$periodType][] = $date->toString();
}
return $result;
}
/**
* Called when deleting all periods.
*
* @param Date[] $dates
* @return string[][][]
*/
private function getDatesByYearMonthAndPeriodType($dates)
{
$result = array();
foreach ($dates as $date) {
$yearMonth = ArchiveTableCreator::getTableMonthFromDate($date);
$result[$yearMonth][null][] = $date->toString();
// since we're removing all periods, we must make sure to remove year periods as well.
// this means we have to make sure the january table is processed.
$janYearMonth = $date->toString('Y') . '_01';
$result[$janYearMonth][null][] = $date->toString();
}
return $result;
}
/**
* @param int[] $idSites
* @param string[][][] $dates
* @throws \Exception
*/
private function markArchivesInvalidated($idSites, $dates, Segment $segment = null)
{
$archiveNumericTables = ArchiveTableCreator::getTablesArchivesInstalled($type = ArchiveTableCreator::NUMERIC_TABLE);
foreach ($archiveNumericTables as $table) {
$tableDate = ArchiveTableCreator::getDateFromTableName($table);
if (empty($dates[$tableDate])) {
continue;
}
$this->model->updateArchiveAsInvalidated($table, $idSites, $dates[$tableDate], $segment);
}
}
/**
* @param Date[] $dates
* @param InvalidationResult $invalidationInfo
* @return \Piwik\Date[]
*/
private function removeDatesThatHaveBeenPurged($dates, InvalidationResult $invalidationInfo)
{
$this->findOlderDateWithLogs($invalidationInfo);
$result = array();
foreach ($dates as $date) {
// we should only delete reports for dates that are more recent than N days
if ($invalidationInfo->minimumDateWithLogs
&& $date->isEarlier($invalidationInfo->minimumDateWithLogs)
) {
$invalidationInfo->warningDates[] = $date->toString();
continue;
}
$result[] = $date;
$invalidationInfo->processedDates[] = $date->toString();
}
return $result;
}
private function findOlderDateWithLogs(InvalidationResult $info)
{
// If using the feature "Delete logs older than N days"...
$purgeDataSettings = PrivacyManager::getPurgeDataSettings();
$logsDeletedWhenOlderThanDays = (int)$purgeDataSettings['delete_logs_older_than'];
$logsDeleteEnabled = $purgeDataSettings['delete_logs_enable'];
if ($logsDeleteEnabled
&& $logsDeletedWhenOlderThanDays
) {
$info->minimumDateWithLogs = Date::factory('today')->subDay($logsDeletedWhenOlderThanDays);
}
}
/**
* @param array $idSites
* @param array $yearMonths
*/
private function markInvalidatedArchivesForReprocessAndPurge(array $idSites, $yearMonths)
{
$store = new SitesToReprocessDistributedList();
$store->add($idSites);
$archivesToPurge = new ArchivesToPurgeDistributedList();
$archivesToPurge->add($yearMonths);
}
}

View file

@ -0,0 +1,56 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\Archive\ArchiveInvalidator;
use Piwik\Date;
/**
* Information about the result of an archive invalidation operation.
*/
class InvalidationResult
{
/**
* Dates that couldn't be invalidated because they are earlier than the configured log
* deletion limit.
*
* @var array
*/
public $warningDates = array();
/**
* Dates that were successfully invalidated.
*
* @var array
*/
public $processedDates = array();
/**
* The day of the oldest log entry.
*
* @var Date|bool
*/
public $minimumDateWithLogs = false;
/**
* @return string[]
*/
public function makeOutputLogs()
{
$output = array();
if ($this->warningDates) {
$output[] = 'Warning: the following Dates have not been invalidated, because they are earlier than your Log Deletion limit: ' .
implode(", ", $this->warningDates) .
"\n The last day with logs is " . $this->minimumDateWithLogs . ". " .
"\n Please disable 'Delete old Logs' or set it to a higher deletion threshold (eg. 180 days or 365 years).'.";
}
$output[] = "Success. The following dates were invalidated successfully: " . implode(", ", $this->processedDates);
return $output;
}
}

View file

@ -0,0 +1,272 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\Archive;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Config;
use Piwik\Container\StaticContainer;
use Piwik\DataAccess\ArchiveTableCreator;
use Piwik\DataAccess\Model;
use Piwik\Date;
use Piwik\Piwik;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
/**
* Service that purges temporary, error-ed, invalid and custom range archives from archive tables.
*
* Temporary archives are purged if they were archived before a specific time. The time is dependent
* on whether browser triggered archiving is enabled or not.
*
* Error-ed archives are purged w/o constraint.
*
* Invalid archives are purged if a new, valid, archive exists w/ the same site, date, period combination.
* Archives are marked as invalid via Piwik\Archive\ArchiveInvalidator.
*/
class ArchivePurger
{
/**
* @var Model
*/
private $model;
/**
* Date threshold for purging custom range archives. Archives that are older than this date
* are purged unconditionally from the requested archive table.
*
* @var Date
*/
private $purgeCustomRangesOlderThan;
/**
* Date to use for 'yesterday'. Exists so tests can override this value.
*
* @var Date
*/
private $yesterday;
/**
* Date to use for 'today'. Exists so tests can override this value.
*
* @var $today
*/
private $today;
/**
* Date to use for 'now'. Exists so tests can override this value.
*
* @var int
*/
private $now;
/**
* @var LoggerInterface
*/
private $logger;
public function __construct(Model $model = null, Date $purgeCustomRangesOlderThan = null, LoggerInterface $logger = null)
{
$this->model = $model ?: new Model();
$this->purgeCustomRangesOlderThan = $purgeCustomRangesOlderThan ?: self::getDefaultCustomRangeToPurgeAgeThreshold();
$this->yesterday = Date::factory('yesterday');
$this->today = Date::factory('today');
$this->now = time();
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
}
/**
* Purge all invalidate archives for whom there are newer, valid archives from the archive
* table that stores data for `$date`.
*
* @param Date $date The date identifying the archive table.
* @return int The total number of archive rows deleted (from both the blog & numeric tables).
*/
public function purgeInvalidatedArchivesFrom(Date $date)
{
$numericTable = ArchiveTableCreator::getNumericTable($date);
// we don't want to do an INNER JOIN on every row in a archive table that can potentially have tens to hundreds of thousands of rows,
// so we first look for sites w/ invalidated archives, and use this as a constraint in getInvalidatedArchiveIdsSafeToDelete() below.
// the constraint will hit an INDEX and speed up the inner join that happens in getInvalidatedArchiveIdsSafeToDelete().
$idSites = $this->model->getSitesWithInvalidatedArchive($numericTable);
if (empty($idSites)) {
$this->logger->debug("No sites with invalidated archives found in {table}.", array('table' => $numericTable));
return 0;
}
$archiveIds = $this->model->getInvalidatedArchiveIdsSafeToDelete($numericTable, $idSites);
if (empty($archiveIds)) {
$this->logger->debug("No invalidated archives found in {table} with newer, valid archives.", array('table' => $numericTable));
return 0;
}
$this->logger->info("Found {countArchiveIds} invalidated archives safe to delete in {table}.", array(
'table' => $numericTable, 'countArchiveIds' => count($archiveIds)
));
$deletedRowCount = $this->deleteArchiveIds($date, $archiveIds);
$this->logger->debug("Deleted {count} rows in {table} and its associated blob table.", array(
'table' => $numericTable, 'count' => $deletedRowCount
));
return $deletedRowCount;
}
/**
* Removes the outdated archives for the given month.
* (meaning they are marked with a done flag of ArchiveWriter::DONE_OK_TEMPORARY or ArchiveWriter::DONE_ERROR)
*
* @param Date $dateStart Only the month will be used
* @return int Returns the total number of rows deleted.
*/
public function purgeOutdatedArchives(Date $dateStart)
{
$purgeArchivesOlderThan = $this->getOldestTemporaryArchiveToKeepThreshold();
$deletedRowCount = 0;
$idArchivesToDelete = $this->getOutdatedArchiveIds($dateStart, $purgeArchivesOlderThan);
if (!empty($idArchivesToDelete)) {
$deletedRowCount = $this->deleteArchiveIds($dateStart, $idArchivesToDelete);
$this->logger->info("Deleted {count} rows in archive tables (numeric + blob) for {date}.", array(
'count' => $deletedRowCount,
'date' => $dateStart
));
} else {
$this->logger->debug("No outdated archives found in archive numeric table for {date}.", array('date' => $dateStart));
}
$this->logger->debug("Purging temporary archives: done [ purged archives older than {date} in {yearMonth} ] [Deleted IDs: {deletedIds}]", array(
'date' => $purgeArchivesOlderThan,
'yearMonth' => $dateStart->toString('Y-m'),
'deletedIds' => implode(',', $idArchivesToDelete)
));
return $deletedRowCount;
}
protected function getOutdatedArchiveIds(Date $date, $purgeArchivesOlderThan)
{
$archiveTable = ArchiveTableCreator::getNumericTable($date);
$result = $this->model->getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan);
$idArchivesToDelete = array();
if (!empty($result)) {
foreach ($result as $row) {
$idArchivesToDelete[] = $row['idarchive'];
}
}
return $idArchivesToDelete;
}
/**
* Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space.
*
* @param $date Date
* @return int The total number of rows deleted from both the numeric & blob table.
*/
public function purgeArchivesWithPeriodRange(Date $date)
{
$numericTable = ArchiveTableCreator::getNumericTable($date);
$blobTable = ArchiveTableCreator::getBlobTable($date);
$deletedCount = $this->model->deleteArchivesWithPeriod(
$numericTable, $blobTable, Piwik::$idPeriods['range'], $this->purgeCustomRangesOlderThan);
$level = $deletedCount == 0 ? LogLevel::DEBUG : LogLevel::INFO;
$this->logger->log($level, "Purged {count} range archive rows from {numericTable} & {blobTable}.", array(
'count' => $deletedCount,
'numericTable' => $numericTable,
'blobTable' => $blobTable
));
$this->logger->debug(" [ purged archives older than {threshold} ]", array('threshold' => $this->purgeCustomRangesOlderThan));
return $deletedCount;
}
/**
* Deletes by batches Archive IDs in the specified month,
*
* @param Date $date
* @param $idArchivesToDelete
* @return int Number of rows deleted from both numeric + blob table.
*/
protected function deleteArchiveIds(Date $date, $idArchivesToDelete)
{
$batches = array_chunk($idArchivesToDelete, 1000);
$numericTable = ArchiveTableCreator::getNumericTable($date);
$blobTable = ArchiveTableCreator::getBlobTable($date);
$deletedCount = 0;
foreach ($batches as $idsToDelete) {
$deletedCount += $this->model->deleteArchiveIds($numericTable, $blobTable, $idsToDelete);
}
return $deletedCount;
}
/**
* Returns a timestamp indicating outdated archives older than this timestamp (processed before) can be purged.
*
* @return int|bool Outdated archives older than this timestamp should be purged
*/
protected function getOldestTemporaryArchiveToKeepThreshold()
{
$temporaryArchivingTimeout = Rules::getTodayArchiveTimeToLive();
if (Rules::isBrowserTriggerEnabled()) {
// If Browser Archiving is enabled, it is likely there are many more temporary archives
// We delete more often which is safe, since reports are re-processed on demand
return Date::factory($this->now - 2 * $temporaryArchivingTimeout)->getDateTime();
}
// If cron core:archive command is building the reports, we should keep all temporary reports from today
return $this->yesterday->getDateTime();
}
private static function getDefaultCustomRangeToPurgeAgeThreshold()
{
$daysRangesValid = Config::getInstance()->General['purge_date_range_archives_after_X_days'];
return Date::factory('today')->subDay($daysRangesValid)->getDateTime();
}
/**
* For tests.
*
* @param Date $yesterday
*/
public function setYesterdayDate(Date $yesterday)
{
$this->yesterday = $yesterday;
}
/**
* For tests.
*
* @param Date $today
*/
public function setTodayDate(Date $today)
{
$this->today = $today;
}
/**
* For tests.
*
* @param int $now
*/
public function setNow($now)
{
$this->now = $now;
}
}

View file

@ -0,0 +1,144 @@
<?php
/**
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik\Archive;
use Piwik\DataTable;
/**
* This class is used to split blobs of DataTables into chunks. Each blob used to be stored under one blob in the
* archive table. For better efficiency we do now combine multiple DataTable into one blob entry.
*
* Chunks are identified by having the recordName $recordName_chunk_0_99, $recordName_chunk_100_199 (this chunk stores
* the subtable 100-199).
*/
class Chunk
{
const ARCHIVE_APPENDIX_SUBTABLES = 'chunk';
const NUM_TABLES_IN_CHUNK = 100;
/**
* Get's the record name to use for a given tableId/subtableId.
*
* @param string $recordName eg 'Actions_ActionsUrl'
* @param int $tableId eg '5' for tableId '5'
* @return string eg 'Actions_ActionsUrl_chunk_0_99' as the table should be stored under this blob id.
*/
public function getRecordNameForTableId($recordName, $tableId)
{
$chunk = (floor($tableId / self::NUM_TABLES_IN_CHUNK));
$start = $chunk * self::NUM_TABLES_IN_CHUNK;
$end = $start + self::NUM_TABLES_IN_CHUNK - 1;
return $recordName . $this->getAppendix() . $start . '_' . $end;
}
/**
* Moves the given blobs into chunks and assigns a proper record name containing the chunk number.
*
* @param string $recordName The original archive record name, eg 'Actions_ActionsUrl'
* @param array $blobs An array containg a mapping of tableIds to blobs. Eg array(0 => 'blob', 1 => 'subtableBlob', ...)
* @return array An array where each blob is moved into a chunk, indexed by recordNames.
* eg array('Actions_ActionsUrl_chunk_0_99' => array(0 => 'blob', 1 => 'subtableBlob', ...),
* 'Actions_ActionsUrl_chunk_100_199' => array(...))
*/
public function moveArchiveBlobsIntoChunks($recordName, $blobs)
{
$chunks = array();
foreach ($blobs as $tableId => $blob) {
$name = $this->getRecordNameForTableId($recordName, $tableId);
if (!array_key_exists($name, $chunks)) {
$chunks[$name] = array();
}
$chunks[$name][$tableId] = $blob;
}
return $chunks;
}
/**
* Detects whether a recordName like 'Actions_ActionUrls_chunk_0_99' or 'Actions_ActionUrls' belongs to a
* chunk or not.
*
* To be a valid recordName that belongs to a chunk it must end with '_chunk_NUMERIC_NUMERIC'.
*
* @param string $recordName
* @return bool
*/
public function isRecordNameAChunk($recordName)
{
$posAppendix = $this->getEndPosOfChunkAppendix($recordName);
if (false === $posAppendix) {
return false;
}
// will contain "0_99" of "chunk_0_99"
$blobId = substr($recordName, $posAppendix);
return $this->isChunkRange($blobId);
}
private function isChunkRange($blobId)
{
$blobId = explode('_', $blobId);
return 2 === count($blobId) && is_numeric($blobId[0]) && is_numeric($blobId[1]);
}
/**
* When having a record like 'Actions_ActionUrls_chunk_0_99" it will return the raw recordName 'Actions_ActionUrls'.
*
* @param string $recordName
* @return string
*/
public function getRecordNameWithoutChunkAppendix($recordName)
{
if (!$this->isRecordNameAChunk($recordName)) {
return $recordName;
}
$posAppendix = $this->getStartPosOfChunkAppendix($recordName);
if (false === $posAppendix) {
return $recordName;
}
return substr($recordName, 0, $posAppendix);
}
/**
* Returns the string that is appended to the original record name. This appendix identifes a record name is a
* chunk.
* @return string
*/
public function getAppendix()
{
return '_' . self::ARCHIVE_APPENDIX_SUBTABLES . '_';
}
private function getStartPosOfChunkAppendix($recordName)
{
return strpos($recordName, $this->getAppendix());
}
private function getEndPosOfChunkAppendix($recordName)
{
$pos = strpos($recordName, $this->getAppendix());
if ($pos === false) {
return false;
}
return $pos + strlen($this->getAppendix());
}
}

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -137,6 +137,21 @@ class DataCollection
return $this->data[$idSite][$period];
}
/**
* Set data for a specific site & period. If there is no data for the given site ID & period,
* it is set to the default row.
*
* @param int $idSite
* @param string $period eg, '2012-01-01,2012-01-31'
* @param string $name eg 'nb_visits'
* @param string $value eg 5
*/
public function set($idSite, $period, $name, $value)
{
$row = & $this->get($idSite, $period);
$row[$name] = $value;
}
/**
* Adds a new metadata to the data for specific site & period. If there is no
* data for the given site ID & period, it is set to the default row.
@ -188,6 +203,7 @@ class DataCollection
$this->putRowInIndex($result, $indexKeys, $row, $idSite, $period);
}
}
return $result;
}
@ -208,9 +224,27 @@ class DataCollection
$this->dataNames, $this->dataType, $this->sitesId, $this->periods, $this->defaultRow);
$index = $this->getIndexedArray($resultIndices);
return $dataTableFactory->make($index, $resultIndices);
}
/**
* See {@link DataTableFactory::makeMerged()}
*
* @param array $resultIndices
* @return DataTable|DataTable\Map
* @throws Exception
*/
public function getMergedDataTable($resultIndices)
{
$dataTableFactory = new DataTableFactory(
$this->dataNames, $this->dataType, $this->sitesId, $this->periods, $this->defaultRow);
$index = $this->getIndexedArray($resultIndices);
return $dataTableFactory->makeMerged($index, $resultIndices);
}
/**
* Returns archive data as a DataTable indexed by metadata. Indexed data will
* be represented by Map instances. Each DataTable will have
@ -249,6 +283,7 @@ class DataCollection
$dataTableFactory->useSubtable($idSubTable);
$index = $this->getIndexedArray($resultIndices);
return $dataTableFactory->make($index, $resultIndices);
}
@ -296,12 +331,16 @@ class DataCollection
if ($metadataName == DataTableFactory::TABLE_METADATA_SITE_INDEX) {
$indexKeyValues = array_values($this->sitesId);
} else if ($metadataName == DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
} elseif ($metadataName == DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
$indexKeyValues = array_keys($this->periods);
}
foreach ($indexKeyValues as $key) {
$result[$key] = $this->createOrderedIndex($metadataNamesToIndexBy);
if (empty($metadataNamesToIndexBy)) {
$result = array_fill_keys($indexKeyValues, array());
} else {
foreach ($indexKeyValues as $key) {
$result[$key] = $this->createOrderedIndex($metadataNamesToIndexBy);
}
}
}
@ -318,7 +357,7 @@ class DataCollection
foreach ($metadataNamesToIndexBy as $metadataName) {
if ($metadataName == DataTableFactory::TABLE_METADATA_SITE_INDEX) {
$key = $idSite;
} else if ($metadataName == DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
} elseif ($metadataName == DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
$key = $period;
} else {
$key = $row[self::METADATA_CONTAINER_ROW_KEY][$metadataName];

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -95,6 +95,23 @@ class DataTableFactory
$this->defaultRow = $defaultRow;
}
/**
* Returns the ID of the site a table is related to based on the 'site' metadata entry,
* or null if there is none.
*
* @param DataTable $table
* @return int|null
*/
public static function getSiteIdFromMetadata(DataTable $table)
{
$site = $table->getMetadata('site');
if (empty($site)) {
return null;
} else {
return $site->getId();
}
}
/**
* Tells the factory instance to expand the DataTables that are created by
* creating subtables and setting the subtable IDs of rows w/ subtables correctly.
@ -128,6 +145,11 @@ class DataTableFactory
$this->idSubtable = $idSubtable;
}
private function isNumericDataType()
{
return $this->dataType == 'numeric';
}
/**
* Creates a DataTable|Set instance using an index of
* archive data.
@ -139,21 +161,63 @@ class DataTableFactory
*/
public function make($index, $resultIndices)
{
$keyMetadata = $this->getDefaultMetadata();
if (empty($resultIndices)) {
// for numeric data, if there's no index (and thus only 1 site & period in the query),
// we want to display every queried metric name
if (empty($index)
&& $this->dataType == 'numeric'
&& $this->isNumericDataType()
) {
$index = $this->defaultRow;
}
$dataTable = $this->createDataTable($index, $keyMetadata = array());
$dataTable = $this->createDataTable($index, $keyMetadata);
} else {
$dataTable = $this->createDataTableMapFromIndex($index, $resultIndices, $keyMetadata = array());
$dataTable = $this->createDataTableMapFromIndex($index, $resultIndices, $keyMetadata);
}
return $dataTable;
}
/**
* Creates a merged DataTable|Map instance using an index of archive data similar to {@link make()}.
*
* Whereas {@link make()} creates a Map for each result index (period and|or site), this will only create a Map
* for a period result index and move all site related indices into one dataTable. This is the same as doing
* `$dataTableFactory->make()->mergeChildren()` just much faster. It is mainly useful for reports across many sites
* eg `MultiSites.getAll`. Was done as part of https://github.com/piwik/piwik/issues/6809
*
* @param array $index @see DataCollection
* @param array $resultIndices an array mapping metadata names with pretty metadata labels.
*
* @return DataTable|DataTable\Map
* @throws \Exception
*/
public function makeMerged($index, $resultIndices)
{
if (!$this->isNumericDataType()) {
throw new \Exception('This method is supposed to work with non-numeric data types but it is not tested. To use it, remove this exception and write tests to be sure it works.');
}
$hasSiteIndex = isset($resultIndices[self::TABLE_METADATA_SITE_INDEX]);
$hasPeriodIndex = isset($resultIndices[self::TABLE_METADATA_PERIOD_INDEX]);
$isNumeric = $this->isNumericDataType();
// to be backwards compatible use a Simple table if needed as it will be formatted differently
$useSimpleDataTable = !$hasSiteIndex && $isNumeric;
if (!$hasSiteIndex) {
$firstIdSite = reset($this->sitesId);
$index = array($firstIdSite => $index);
}
if ($hasPeriodIndex) {
$dataTable = $this->makeMergedTableWithPeriodAndSiteIndex($index, $resultIndices, $useSimpleDataTable, $isNumeric);
} else {
$dataTable = $this->makeMergedWithSiteIndex($index, $useSimpleDataTable, $isNumeric);
}
$this->transformMetadata($dataTable);
return $dataTable;
}
@ -171,16 +235,16 @@ class DataTableFactory
* @param array $blobRow
* @return DataTable|DataTable\Map
*/
private function makeFromBlobRow($blobRow)
private function makeFromBlobRow($blobRow, $keyMetadata)
{
if ($blobRow === false) {
return new DataTable();
}
if (count($this->dataNames) === 1) {
return $this->makeDataTableFromSingleBlob($blobRow);
return $this->makeDataTableFromSingleBlob($blobRow, $keyMetadata);
} else {
return $this->makeIndexedByRecordNameDataTable($blobRow);
return $this->makeIndexedByRecordNameDataTable($blobRow, $keyMetadata);
}
}
@ -192,7 +256,7 @@ class DataTableFactory
* @param array $blobRow
* @return DataTable
*/
private function makeDataTableFromSingleBlob($blobRow)
private function makeDataTableFromSingleBlob($blobRow, $keyMetadata)
{
$recordName = reset($this->dataNames);
if ($this->idSubtable !== null) {
@ -206,7 +270,7 @@ class DataTableFactory
}
// set table metadata
$table->setMetadataValues(DataCollection::getDataRowMetadata($blobRow));
$table->setAllTableMetadata(array_merge(DataCollection::getDataRowMetadata($blobRow), $keyMetadata));
if ($this->expandDataTable) {
$table->enableRecursiveFilters();
@ -223,12 +287,12 @@ class DataTableFactory
* @param array $blobRow
* @return DataTable\Map
*/
private function makeIndexedByRecordNameDataTable($blobRow)
private function makeIndexedByRecordNameDataTable($blobRow, $keyMetadata)
{
$table = new DataTable\Map();
$table->setKeyName('recordName');
$tableMetadata = DataCollection::getDataRowMetadata($blobRow);
$tableMetadata = array_merge(DataCollection::getDataRowMetadata($blobRow), $keyMetadata);
foreach ($blobRow as $name => $blob) {
$newTable = DataTable::fromSerializedArray($blob);
@ -248,23 +312,23 @@ class DataTableFactory
* @param array $keyMetadata The metadata to add to the table when it's created.
* @return DataTable\Map
*/
private function createDataTableMapFromIndex($index, $resultIndices, $keyMetadata = array())
private function createDataTableMapFromIndex($index, $resultIndices, $keyMetadata)
{
$resultIndexLabel = reset($resultIndices);
$result = new DataTable\Map();
$result->setKeyName(reset($resultIndices));
$resultIndex = key($resultIndices);
array_shift($resultIndices);
$result = new DataTable\Map();
$result->setKeyName($resultIndexLabel);
$hasIndices = !empty($resultIndices);
foreach ($index as $label => $value) {
$keyMetadata[$resultIndex] = $label;
$keyMetadata[$resultIndex] = $this->createTableIndexMetadata($resultIndex, $label);
if (empty($resultIndices)) {
$newTable = $this->createDataTable($value, $keyMetadata);
} else {
if ($hasIndices) {
$newTable = $this->createDataTableMapFromIndex($value, $resultIndices, $keyMetadata);
} else {
$newTable = $this->createDataTable($value, $keyMetadata);
}
$result->addTable($newTable, $this->prettifyIndexLabel($resultIndex, $label));
@ -273,6 +337,15 @@ class DataTableFactory
return $result;
}
private function createTableIndexMetadata($resultIndex, $label)
{
if ($resultIndex === DataTableFactory::TABLE_METADATA_SITE_INDEX) {
return new Site($label);
} elseif ($resultIndex === DataTableFactory::TABLE_METADATA_PERIOD_INDEX) {
return $this->periods[$label];
}
}
/**
* Creates a DataTable instance from an index row.
*
@ -283,11 +356,11 @@ class DataTableFactory
private function createDataTable($data, $keyMetadata)
{
if ($this->dataType == 'blob') {
$result = $this->makeFromBlobRow($data);
$result = $this->makeFromBlobRow($data, $keyMetadata);
} else {
$result = $this->makeFromMetricsArray($data);
$result = $this->makeFromMetricsArray($data, $keyMetadata);
}
$this->setTableMetadata($keyMetadata, $result);
return $result;
}
@ -307,7 +380,7 @@ class DataTableFactory
&& $treeLevel >= $this->maxSubtableDepth
) {
// unset the subtables so DataTableManager doesn't throw
foreach ($dataTable->getRows() as $row) {
foreach ($dataTable->getRowsWithoutSummaryRow() as $row) {
$row->removeSubtable();
}
@ -316,7 +389,7 @@ class DataTableFactory
$dataName = reset($this->dataNames);
foreach ($dataTable->getRows() as $row) {
foreach ($dataTable->getRowsWithoutSummaryRow() as $row) {
$sid = $row->getIdSubDataTable();
if ($sid === null) {
continue;
@ -340,17 +413,12 @@ class DataTableFactory
}
}
/**
* Converts site IDs and period string ranges into Site instances and
* Period instances in DataTable metadata.
*/
private function transformMetadata($table)
private function getDefaultMetadata()
{
$periods = $this->periods;
$table->filter(function ($table) use ($periods) {
$table->setMetadata(DataTableFactory::TABLE_METADATA_SITE_INDEX, new Site($table->getMetadata(DataTableFactory::TABLE_METADATA_SITE_INDEX)));
$table->setMetadata(DataTableFactory::TABLE_METADATA_PERIOD_INDEX, $periods[$table->getMetadata(DataTableFactory::TABLE_METADATA_PERIOD_INDEX)]);
});
return array(
DataTableFactory::TABLE_METADATA_SITE_INDEX => new Site(reset($this->sitesId)),
DataTableFactory::TABLE_METADATA_PERIOD_INDEX => reset($this->periods),
);
}
/**
@ -368,39 +436,16 @@ class DataTableFactory
return $label;
}
/**
* @param $keyMetadata
* @param $result
*/
private function setTableMetadata($keyMetadata, $result)
{
if (!isset($keyMetadata[DataTableFactory::TABLE_METADATA_SITE_INDEX])) {
$keyMetadata[DataTableFactory::TABLE_METADATA_SITE_INDEX] = reset($this->sitesId);
}
if (!isset($keyMetadata[DataTableFactory::TABLE_METADATA_PERIOD_INDEX])) {
reset($this->periods);
$keyMetadata[DataTableFactory::TABLE_METADATA_PERIOD_INDEX] = key($this->periods);
}
// Note: $result can be a DataTable\Map
$result->filter(function ($table) use ($keyMetadata) {
foreach ($keyMetadata as $name => $value) {
$table->setMetadata($name, $value);
}
});
}
/**
* @param $data
* @return DataTable\Simple
*/
private function makeFromMetricsArray($data)
private function makeFromMetricsArray($data, $keyMetadata)
{
$table = new DataTable\Simple();
if (!empty($data)) {
$table->setAllTableMetadata(DataCollection::getDataRowMetadata($data));
$table->setAllTableMetadata(array_merge(DataCollection::getDataRowMetadata($data), $keyMetadata));
DataCollection::removeMetadataFromDataRow($data);
@ -412,15 +457,89 @@ class DataTableFactory
// w/o this code, an empty array would be created, and other parts of Piwik
// would break.
if (count($this->dataNames) == 1
&& $this->dataType == 'numeric'
&& $this->isNumericDataType()
) {
$name = reset($this->dataNames);
$table->addRow(new Row(array(Row::COLUMNS => array($name => 0))));
}
$table->setAllTableMetadata($keyMetadata);
}
$result = $table;
return $result;
}
}
private function makeMergedTableWithPeriodAndSiteIndex($index, $resultIndices, $useSimpleDataTable, $isNumeric)
{
$map = new DataTable\Map();
$map->setKeyName($resultIndices[self::TABLE_METADATA_PERIOD_INDEX]);
// we save all tables of the map in this array to be able to add rows fast
$tables = array();
foreach ($this->periods as $range => $period) {
// as the resulting table is "merged", we do only set Period metedata and no metadata for site. Instead each
// row will have an idsite metadata entry.
$metadata = array(self::TABLE_METADATA_PERIOD_INDEX => $period);
if ($useSimpleDataTable) {
$table = new DataTable\Simple();
} else {
$table = new DataTable();
}
$table->setAllTableMetadata($metadata);
$map->addTable($table, $this->prettifyIndexLabel(self::TABLE_METADATA_PERIOD_INDEX, $range));
$tables[$range] = $table;
}
foreach ($index as $idsite => $table) {
$rowMeta = array('idsite' => $idsite);
foreach ($table as $range => $row) {
if (!empty($row)) {
$tables[$range]->addRow(new Row(array(
Row::COLUMNS => $row,
Row::METADATA => $rowMeta)
));
} elseif ($isNumeric) {
$tables[$range]->addRow(new Row(array(
Row::COLUMNS => $this->defaultRow,
Row::METADATA => $rowMeta)
));
}
}
}
return $map;
}
private function makeMergedWithSiteIndex($index, $useSimpleDataTable, $isNumeric)
{
if ($useSimpleDataTable) {
$table = new DataTable\Simple();
} else {
$table = new DataTable();
}
$table->setAllTableMetadata(array(DataTableFactory::TABLE_METADATA_PERIOD_INDEX => reset($this->periods)));
foreach ($index as $idsite => $row) {
if (!empty($row)) {
$table->addRow(new Row(array(
Row::COLUMNS => $row,
Row::METADATA => array('idsite' => $idsite))
));
} elseif ($isNumeric) {
$table->addRow(new Row(array(
Row::COLUMNS => $this->defaultRow,
Row::METADATA => array('idsite' => $idsite))
));
}
}
return $table;
}
}

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -9,7 +9,6 @@
namespace Piwik\Archive;
use Exception;
use Piwik\Period;
use Piwik\Segment;
@ -36,22 +35,16 @@ class Parameters
*/
private $segment;
/**
* @var bool
*/
private $skipAggregationOfSubTables;
public function getSegment()
{
return $this->segment;
}
public function __construct($idSites, $periods, Segment $segment, $skipAggregationOfSubTables)
public function __construct($idSites, $periods, Segment $segment)
{
$this->idSites = $idSites;
$this->periods = $periods;
$this->segment = $segment;
$this->skipAggregationOfSubTables = $skipAggregationOfSubTables;
}
public function getPeriods()
@ -63,11 +56,4 @@ class Parameters
{
return $this->idSites;
}
public function isSkipAggregationOfSubTables()
{
return $this->skipAggregationOfSubTables;
}
}