update Piwik to version 2.16 (fixes #91)

This commit is contained in:
oliver 2016-04-10 18:55:57 +02:00
commit d885a4baa9
5833 changed files with 418860 additions and 226988 deletions

View file

@ -0,0 +1,8 @@
<Files "*">
<IfVersion < 2.4>
Deny from all
</IfVersion>
<IfVersion >= 2.4>
Require all denied
</IfVersion>
</Files>

View file

@ -1,7 +1,8 @@
<html>
<head>
<meta http-equiv="refresh" content="0;url=http://piwik.org/docs/installation/"/>
<meta http-equiv="refresh" content="0;url=https://piwik.org/docs/installation/"/>
<meta name="robots" content="noindex,nofollow">
</head>
<body>You will be redirected to the Piwik Installation documentation on <a href='http://piwik.org/docs/installation/'>http://piwik.org/docs/installation/</a>
<body>You will be redirected to the Piwik Installation documentation on <a href='https://piwik.org/docs/installation/'>https://piwik.org/docs/installation/</a>
</body>
</html>

View file

@ -0,0 +1,53 @@
#!/bin/bash
if ! type phpize &> /dev/null; then
echo "phpize missing, skipping build"
echo "If you installed PHP via Aptitude, you can install phpize w/ 'sudo apt-get install php5-dev'"
exit
fi
if ! type make &> /dev/null; then
echo "make missing, skipping build"
exit
fi
if ! [ -d "vendor/facebook/xhprof/extension" ]; then
echo "xhprof missing, skipping build"
exit
fi
mkdir -p tmp/xhprof-logs
cd vendor/facebook/xhprof/extension
echo "Building xhprof..."
if ! phpize &> ../../../../tmp/xhprof-logs/phpize.log; then
echo "Fatal error: phpize failed! View tmp/xhprof-logs/phpize.log for more info."
exit 1
fi
# Execute aclocal and autoconf only if Gentoo is used.
if [[ -x /usr/bin/emerge ]]; then
if ! aclocal &> ../../../../tmp/xhprof-logs/aclocal.log; then
echo "Fatal error: aclocal failed! View tmp/xhprof-logs/aclocal.log for more info."
exit 1
fi
if ! autoconf &> ../../../../tmp/xhprof-logs/autoconf.log; then
echo "Fatal error: autoconf failed! View tmp/xhprof-logs/autoconf.log for more info."
exit 1
fi
fi
if ! ./configure &> ../../../../tmp/xhprof-logs/configure.log; then
echo "Fatal error: configure script failed! View tmp/xhprof-logs/configure.log for more info."
exit 2
fi
if ! make &> ../../../../tmp/xhprof-logs/make.log; then
echo "Fatal error: could not build extension (make failed)! View tmp/xhprof-logs/make.log for more info."
exit 3
fi
echo "Done."

View file

@ -0,0 +1,12 @@
#!/bin/bash
# if xhprof exists but points to a non-master branch, checkout master so composer won't fail
if [ -d "vendor/facebook/xhprof/extension" ]; then
cd vendor/facebook/xhprof/extension
GIT_BRANCH=$(git symbolic-ref HEAD 2>/dev/null)
git reset --hard &> /dev/null
git checkout master &> /dev/null
fi

View file

@ -0,0 +1,5 @@
# Allow direct web access to Web cron
<Files "archive.php">
Order Allow,Deny
Allow from all
</Files>

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -17,12 +17,10 @@ if (!defined('PIWIK_USER_PATH')) {
define('PIWIK_USER_PATH', PIWIK_INCLUDE_PATH);
}
if (!class_exists('Piwik\Console', false)) {
define('PIWIK_ENABLE_DISPATCH', false);
define('PIWIK_ENABLE_ERROR_HANDLER', false);
define('PIWIK_ENABLE_SESSION_START', false);
require_once PIWIK_INCLUDE_PATH . "/index.php";
}
define('PIWIK_ENABLE_ERROR_HANDLER', false);
define('PIWIK_ENABLE_SESSION_START', false);
require_once PIWIK_INCLUDE_PATH . '/core/Common.php';
if (!empty($_SERVER['argv'][0])) {
$callee = $_SERVER['argv'][0];
@ -35,17 +33,43 @@ if (false !== strpos($callee, 'archive.php')) {
echo "
-------------------------------------------------------
Using this 'archive.php' script is no longer recommended.
Please use '/path/to/php $piwikHome/console core:archive " . implode(' ', array_slice($_SERVER['argv'], 1)) . "' instead.
Please use '/path/to/php $piwikHome/console core:archive " . implode('', array_slice($_SERVER['argv'], 1)) . "' instead.
To get help use '/path/to/php $piwikHome/console core:archive --help'
See also: http://piwik.org/docs/setup-auto-archiving/
If you cannot use the console because it requires CLI
try 'php archive.php --url=http://your.piwik/path'
-------------------------------------------------------
\n\n";
}
$archiving = new Piwik\CronArchive();
try {
$archiving->main();
} catch (Piwik\CronArchiveFatalException $ex) {
$ex->logAndExit($archiving);
} catch (Exception $e) {
$archiving->logFatalExceptionAndExit($e);
}
if (Piwik\Common::isPhpCliMode()) {
require_once PIWIK_INCLUDE_PATH . "/core/bootstrap.php";
$console = new Piwik\Console();
// manipulate command line arguments so CoreArchiver command will be executed
$script = array_shift($_SERVER['argv']);
array_unshift($_SERVER['argv'], 'core:archive');
array_unshift($_SERVER['argv'], $script);
$console->run();
} else { // if running via web request, use CoreAdminHome.runCronArchiving method
Piwik\Common::sendHeader('Content-type: text/plain');
$_GET['module'] = 'API';
$_GET['method'] = 'CoreAdminHome.runCronArchiving';
$_GET['format'] = 'console'; // will use Content-type text/plain
if(!isset($_GET['token_auth'])) {
echo "
<b>You must specify the Super User token_auth as a parameter to this script, eg. <code>?token_auth=XYZ</code> if you wish to run this script through the browser. </b><br>
However it is recommended to run it <a href='http://piwik.org/docs/setup-auto-archiving/'>via cron in the command line</a>, since it can take a long time to run.<br/>
In a shell, execute for example the following to trigger archiving on the local Piwik server:<br/>
<code>$ /path/to/php /path/to/piwik/console core:archive --url=http://your-website.org/path/to/piwik/</code>
\n\n";
exit;
}
require_once PIWIK_INCLUDE_PATH . "/index.php";
}

View file

@ -1,52 +1,19 @@
#!/bin/sh -e
# =======================================================================
# BEFORE YOU USE THIS SCRIPT:
# PLEASE DON'T.
# =======================================================================
# WARNING: this script archive.sh is DEPRECATED!
#
#
# ==> Use archive.php instead. <==
# => Replace your cron with `/usr/bin/php5 /path/to/piwik/console core:archive --url=http://example.org/piwik/`
#
# See documentation at http://piwik.org/setup-auto-archiving/
# =======================================================================
# Description
# This cron script will automatically run Piwik archiving every hour.
# The script will also run scheduled tasks configured within piwik using
# the event hook 'TaskScheduler.getScheduledTasks'
# It automatically fetches the Super User token_auth
# and triggers the archiving for all websites for all periods.
# This ensures that all reports are pre-computed and Piwik renders very fast.
# Documentation
# Please check the documentation on http://piwik.org/docs/setup-auto-archiving/
# How to setup the crontab job?
# Add the following lines in your crontab file, eg. /etc/cron.d/piwik-archive
#---------------START CRON TAB--
#MAILTO="youremail@example.com"
#5 * * * * www-data /path/to/piwik/misc/cron/archive.sh > /dev/null
#-----------------END CRON TAB--
# When an error occurs (eg. php memory error, timeout) the error messages
# will be sent to youremail@example.com.
#
# Optimization for high traffic websites
# You may want to override the following settings in config/config.ini.php:
# See documentation of the fields in your piwik/config/config.ini.php
#
# [General]
# time_before_archive_considered_outdated = 3600
# enable_browser_archiving_triggering = false
#===========================================================================
for TEST_PHP_BIN in php5 php php-cli php-cgi; do
if which $TEST_PHP_BIN >/dev/null 2>/dev/null; then
PHP_BIN=`which $TEST_PHP_BIN`
break
fi
done
if test -z $PHP_BIN; then
echo "php binary not found. Make sure php5 or php exists in PATH." >&2
exit 1
@ -61,39 +28,16 @@ act_path() {
ARCHIVE=`act_path ${0}`
PIWIK_CRON_FOLDER=`dirname ${ARCHIVE}`
PIWIK_PATH="$PIWIK_CRON_FOLDER"/../../index.php
PIWIK_TOKEN_GENERATOR="$PIWIK_CRON_FOLDER"/../../misc/cron/updatetoken.php
PIWIK_PATH="$PIWIK_CRON_FOLDER"/../../console
FILENAME_TOKEN_CONTENT=`$PHP_BIN $PIWIK_TOKEN_GENERATOR`
TOKEN_AUTH=`cat $FILENAME_TOKEN_CONTENT | cut -f2`
CONSOLE_CMD="$PHP_BIN -q $PIWIK_PATH core:archive --url=http://example.org"
CMD_GET_ID_SITES="$PHP_BIN -q $PIWIK_PATH -- module=API&method=SitesManager.getAllSitesId&token_auth=$TOKEN_AUTH&format=csv&convertToUnicode=0"
ID_SITES=`$CMD_GET_ID_SITES`
MESSAGE="\n\n WARNING: this script archive.sh is DEPRECATED! \n\nPlease update your cron as explained in the documentation: http://piwik.org/docs/setup-auto-archiving/ \n\n"
echo "Starting Piwik reports archiving..."
echo ""
for idsite in $ID_SITES; do
TEST_IS_NUMERIC=`echo $idsite | egrep '^[0-9]+$'`
if test -n "$TEST_IS_NUMERIC"; then
for period in day week month year; do
echo ""
echo "Archiving period = $period for idsite = $idsite..."
CMD="$PHP_BIN -q $PIWIK_PATH -- module=API&method=VisitsSummary.getVisits&idSite=$idsite&period=$period&date=last52&format=xml&token_auth=$TOKEN_AUTH"
$CMD
done
echo $MESSAGE;
echo ""
echo "Archiving for idsite = $idsite done!"
fi
done
$CONSOLE_CMD
echo "Reports archiving finished."
echo "---------------------------"
echo "Starting Scheduled tasks..."
echo ""
CMD="$PHP_BIN -q $PIWIK_PATH -- module=API&method=CoreAdminHome.runScheduledTasks&format=csv&convertToUnicode=0&token_auth=$TOKEN_AUTH"
$CMD
echo ""
echo "Finished Scheduled tasks."
echo ""
echo $MESSAGE;
exit 1

View file

@ -1,6 +1,6 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik - free/libre analytics platform
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@ -11,6 +11,10 @@
namespace Piwik;
use Piwik\Application\Environment;
use Piwik\Tests\Framework\TestingEnvironmentManipulator;
use Piwik\Tests\Framework\TestingEnvironmentVariables;
if (!defined('PIWIK_INCLUDE_PATH')) {
define('PIWIK_INCLUDE_PATH', realpath(dirname(__FILE__) . "/../.."));
}
@ -31,9 +35,28 @@ if (!Common::isPhpCliMode()) {
$testmode = in_array('--testmode', $_SERVER['argv']);
if ($testmode) {
require_once PIWIK_INCLUDE_PATH . "/tests/PHPUnit/TestingEnvironment.php";
define('PIWIK_TEST_MODE', true);
\Piwik_TestingEnvironment::addHooks();
Environment::setGlobalEnvironmentManipulator(new TestingEnvironmentManipulator(new TestingEnvironmentVariables()));
}
function getPiwikDomain()
{
foreach($_SERVER['argv'] as $param) {
$pattern = '--piwik-domain=';
if(false !== strpos($param, $pattern)) {
return substr($param, strlen($pattern));
}
}
return null;
}
$environment = new Environment('cli');
$environment->init();
$piwikDomain = getPiwikDomain();
if($piwikDomain) {
Url::setHost($piwikDomain);
}
$token = Db::get()->fetchOne("SELECT token_auth
@ -41,7 +64,8 @@ $token = Db::get()->fetchOne("SELECT token_auth
WHERE superuser_access = 1
ORDER BY date_registered ASC");
$filename = PIWIK_INCLUDE_PATH . '/tmp/cache/token.php';
$filename = $environment->getContainer()->get('path.tmp') . '/cache/token.php';
$content = "<?php exit; //\t" . $token;
file_put_contents($filename, $content);
echo $filename;

View file

@ -0,0 +1,550 @@
# Technical concept for implementing Content Tracking [#4996](#4996)
See https://github.com/piwik/piwik/issues/4996 for explanation of the actual feature.
This is the technical concept for implementing content tracking. We won't plan anything to death but a little bit of thinking upfront makes sense :) Feel free to contribute and let us know if you have any objections! If your thoughts are not technical please comment on the actual issue [#4996](#4996).
## Naming
| Name | Purpose |
| ------------- | ------------- |
| Plugin name | Contents |
| Content block | Is a container which consists of a content name, piece, target and an interaction. |
| Content name | A name that represents a content block. The name will be visible in reports. One name can belong to differnt content pieces. |
| Content piece | This is the actual content that was displayed, eg a path to a video/image/audio file, a text, ... |
| Content target | For instance the URL of a landing page where the user was led to after interacting with the content block. |
| Content impression | Any content block that was displayed on a page, such as a banner or an ad. Optionally you can tell Piwik to track only impressions for content blocks that were actually visible. |
| Content interaction | Any content block that was interacted with by a user. This means usually a 'click' on a banner or ad happened, but it can be any interaction. |
| Content interaction rate | The ratio of content impressions to interactions. For instance an ad was displayed a 100 times and there were 2 interactions results in a rate of 2%. |
## Tracking the content declarative
Generally said you can usually choose between HTML attributes and CSS classes to define the content you want to track. Attributes always take precedence over CSS classes. So if you define an attribute on one element and a CSS class on another element we will always pick the element having the attribute. If you set the same attribute or the same class on multiple elements within one block, the first element will always win.
Nested content blocks are currently not supported.
HTML attributes are the recommended way to go as it allows you to set a specific value that will be used when detecting the content impressions on your website.
Imagine you do not have a value for an HTML attribute provided or if a CSS class is used, we will have to try to detect the content name, piece and target automatically based on a set of rules which are explained further below. For instance we are trying to read the content target from a `href` attribute of a link, the content piece from a `src` attribute of an image, and the name from a `title` attribute.
If you let us automatically detect those values it can influence your tracking over time. For instance if you provide the same page in different languages, and we will detect the content automatically, we might end up in many different content blocks that represent actually all the same. Therefore it is recommended to use the HTML-attributes including values.
The following attributes and their corresponding CSS classes are used which will be explained in detail below:
* `[data-track-content] or .piwikTrackContent` == Defines a content block
* `[data-content-name=""]` == Defines the name of the content block
* `[data-content-piece=""] or .piwikContentPiece` == Defines the content piece
* `[data-content-target=""] or .piwikContentTarget` == Defines the content target
* `[data-content-ignoreinteraction] or .piwikContentIgnoreInteraction` == Tells Piwik to not automatically track the interaction
### How to define a block of content?
You can use either the attribute `data-track-content` or the CSS class `piwikTrackContent`. The attribute does not require any value.
Examples:
```
<img src="img-en.jpg" data-track-content/>
// content name = absolutePath(img-en.jpg)
// content piece = absoluteUrl(img-en.jpg)
// content target = ""
<img src="img-en.jpg" class="piwikTrackContent"/>
// content name = absolutePath(img-en.jpg)
// content piece = absoluteUrl(img-en.jpg)
// content target = ""
```
As you can see in these examples we do detect the content piece and name automatically based on the `src` attribute of the image. The content target cannot be detected since an image does not define a link.
Note: In the future we may allow to define the name of the content using this attribute instead of `data-content-name` but I did not want this for two reasons: It could also define the actual content (the content piece) so it would not be intuitive, using `data-content-name` attribute allows to set the name also on nested attributes.
### How do we detect the content piece element?
The content piece element is used to detect the actual content of a content block.
To find the content piece element we will try to find an element having the attribute `data-content-piece` or the CSS class `piwikContentPiece`. This attribute/class can be specified anywhere within a content block.
If we do not find any specific content piece element, we will use the content block element.
### How do we detect the content piece?
* The simplest scenario is to provide an HTML attribute `data-content-piece="foo"` including a value anywhere within the content block or in the content block element itself.
* If there is no such attribute we will check whether the content piece element is a media (audio, video, image) and we will try to detect the URL to the media automatically. For instance using the `src` attribute. If a found media URL does not include a domain or is not an absolute URL we will make sure to have a fully qualified URL.
* In case of video and audio elements, when there are multiple sources defined, we will choose the URL of the first source
* If we haven't found anything we will fall back to use the value "Unknown". In such a case you should set the attribute `data-content-piece` telling us explicitly what the content is.
Examples:
```
<a href="http://www.example.com" data-track-content><img src="img-en.jpg" data-content-piece="img.jpg"/></a>
// content name = img.jpg
// content piece = img.jpg
// content target = http://www.example.com
```
As you can see we can now define a specific value for the content piece which can be useful if your text or images are different in for each language.
This time we can also automatically detect the content target since we have set the content block on an `a` element. More about this later. The `data-content-piece` attribute can be set on any element, also in the `a` element.
```
<a href="http://www.example.com" data-track-content><img src="img-en.jpg" data-content-piece/></a>
<a href="http://www.example.com" data-track-content><img src="img-en.jpg" class="piwikContentPiece"/></a>
// content name = absolutePath(img-en.jpg)
// content piece = absoluteUrl(img-en.jpg)
// content target = http://www.example.com
```
In this example we were able to detect the name and the piece of the content automatically based on the `src` attribute.
```
<a href="http://www.example.com" data-track-content><p data-content-piece>Lorem ipsum dolor sit amet</p></a>
<a href="http://www.example.com" data-track-content><p class="piwikContentPiece">Lorem ipsum dolor sit amet</p></a>
// content name = Unknown
// content piece = Unknown
// content target = http://www.example.com
```
As the content piece element is not an image, video or audio we cannot detect the content automatically. In such a case you have to define the `data-content-piece` attribute and set a value to it. We do not use the text of this element by default since the text might change often resulting in many content pieces, since it can be very long, since it can be translated and therefore results in many different content pieces although it is always the same, since it might contain user specific content and so on.
Better:
```
<a href="http://www.example.com" data-track-content><p data-content-piece="My content">Lorem ipsum dolor sit amet...</p></a>
// content name = My content
// content piece = My content
// content target = http://www.example.com
```
### How do we detect the content name?
The content name represents a content block which will help you in the Piwik UI to easily identify a specific block.
* The simplest scenario is that you provide us an HTML attribute `data-content-name` with a value anywhere within a content block or in a content block element itself.
* If there is no such element we will use the value of the content piece in case there is one (if !== Unknown).
* A content piece will be usually detected automatically in case the content piece is an image, video or audio element.
* If content piece is a URL that is identical to the current domain of the website we will remove the domain from the URL
* If we do not find a name we will look for a `title` attribute in the content block element.
* If we do not find a name we will look for a `title` attribute in the content piece element.
* If we do not find a name we will look for a `title` attribute in the content target element.
* If we do not find a name we will fall back to "Unknown"
Examples:
```
<img src="img-en.jpg" data-track-content data-content-name="Image1"/>
// content name = Image1
// content piece = absoluteUrl(img-en.jpg)
// content target = ""
```
This example would be the way to go by defining a `data-content-name` attribute anywhere we can easily detect the name of the content.
```
<img src="img-en.jpg" data-track-content/>
// content name = absolutePath(img-en.jpg)
// content piece = absoluteUrl(img-en.jpg)
// content target = ""
```
If no content name is set, it will default to the content piece in case there is one.
```
<img src="http://www.example.com/path/img-en.jpg" data-track-content/>
// content name = /path/img-en.jpg
// content piece = http://www.example.com/path/img-en.jpg
// content target = ""
```
If content piece contains a domain that is the same as the current website's domain we will remove it
```
<a href="http://www.example.com" data-track-content>Lorem ipsum dolor sit amet...</p></a>
// content name = Unknown
// content piece = Unknown
// content target = http://www.example.com
```
In case there is no content name, no content piece and no title set anywhere it will default to "Unknown". To get a useful content name you should set either the `data-content-name` or a `title` attribute.
```
<a href="http://www.example.com" data-track-content title="Block Title"><span title="Inner Title" data-content-piece>Lorem ipsum dolor sit amet...</span></a>
// content name = Block Title
// content piece = Unknown
// content target = http://www.example.com
```
In case there is no content name and no content piece we will fall back to the `title` attribute of the content block. The `title` attribute of the block element takes precendence over the piece element in this example.
### How do we detect the content target element?
The content target is the element that we will use to detect the URL of the landing page of the content block. The target element is usually a link or a button element. Generally said the target doesn't have to be a URL it can be anything but in most cases it will be a URL. A target could be for instance also a tab-container
We detect the target element either by the attribute `data-content-target` or by the class `.piwikContentTarget`. If no such element can be found we will fall back to the content block element.
### How do we detect the content target URL?
* The simplest scenario is that you provide us an HTML attribute `data-content-target` with a value anywhere within a content block or in a content block element itself.
* If there is no such element we will look for an `href` attribute in the target element
* If there is no such attribute we will use an empty string ""
Examples:
```
<a href="http://www.example.com" data-track-content>Click me</a>
// content name = Unknown
// content piece = Unknown
// content target = "http://www.example.com"
```
As no specific target element is set, we will read the `href` attribute of the content block.
```
<a onclick="location.href='http://www.example.com'" data-content-target="http://www.example.com" data-track-content>Click me</a>
// content name = Unknown
// content piece = Unknown
// content target = "http://www.example.com"
```
No `href` attribute is used as the link is executed via javascript. Therefore a `data-content-target` attribute with value has to be specified.
```
<div data-track-content><input type="submit"/></div>
// content name = Unknown
// content piece = Unknown
// content target = ""
```
As there is neither a `data-content-target` attribute nor a `href` attribute we cannot detect the target.
```
<div data-track-content><input type="submit" data-content-target="http://www.example.com"/></div>
// content name = Unknown
// content piece = Unknown
// content target = "http://www.example.com"
```
As the `data-content-target` attribute is specifically set with a value, we can detect the target URL based on this. Otherwise we could not.
```
<div data-track-content><a href="http://www.example.com" data-content-target>Click me</a></div>
<div data-track-content><a href="http://www.example.com" class="piwikContentTarget">Click me</a></div>
// content name = Unknown
// content piece = Unknown
// content target = "http://www.example.com"
```
As the target element has a `href` attribute we can detect the content target automatically.
### How do we track an interaction automatically?
Interactions can be detected declarative in case the detected target element is an `a` and `area` element with an `href` attribute. If not, you will have to track
the interaction programmatically, see one of the next sections. We generally treat links to the same page differently than downloads or outlinks.
We use `click` events do detect an interaction with a content. On mobile devices you might want to listen to `touch` events. In this case you may have to disable automatic content interaction tracking see below.
#### Links to the same domain
In case we detect a link to the same website we will replace the current `href` attribute with a link to the `piwik.php` tracker URL. Whenever a user clicks on such a link we will first send the user to the `piwik.php` of your Piwik installation and then redirect the user from there to the actual page. This click will be tracked as an event. Where the event category is the string `Content`, the event action is the value of the content interaction such as `click` and the event name will be the same as the content name.
If the URL of the replaced `href` attribute changes meanwhile by your code we will respect the new `href` attribute and make sure to update the link with a `piwik.php` URL. Therefore we will add a `click` listener to the element.
Note: The referrer information will get lost when redirecting from piwik.php to your page. If you depend on this you need to disable automatic tracking of interaction see below
If you have added an `href` attribute after we scanned the DOM for content blocks we can not detect this and an interaction won't be tracked.
#### Outlinks and downloads
Outlinks and downloads are handled as before. If a user clicks on a download or outlink we will track this action using an XHR. Along with the information of this action we will send the information related to the content block. We will not track an additional event for this.
#### Anchor links
Anchor links will be tracked using an XHR.
### How to prevent the automatic tracking of an interaction?
Maybe you do not want us to track any interaction automatically as explained before.
To do so you can either set the attribute `data-content-ignoreinteraction` or the CSS class `piwikContentIgnoreInteraction` on the content target element.
Examples
```
<a href="http://outlink.example.com" class="piwikTrackContent piwikContentIgnoreInteraction">Add to shopping cart</a>
<a href="http://outlink.example.com" data-track-content data-content-ignoreinteraction>Add to shopping cart</a>
<div data-track-content><a href="http://outlink.example.com" data-content-target data-content-ignoreinteraction>Add to shopping cart</a></div>
```
In all examples we would track the impression automatically but not the interaction.
Note: In single page application you will most likely always have to disable automatic tracking of an interaction as otherwise a page reload and a redirect will happen.
### Putting it all together
A few Examples:
```
<div data-track-content data-content-name="My Ad">
<img src="http://www.example.com/path/xyz.jpg" data-content-piece />
<a href="/anylink" data-content-target>Add to shopping cart</a>
</div>
// content name = My Ad
// content piece = http://www.example.com/path/xyz.jpg
// content target = /anylink
```
A typical example for a content block that displays an image - which is the content piece - and a call to action link - which is the content target - below.
We would replace the `href=/anylink` with a link to piwik.php of your Piwik installation which will in turn redirect the user to the actual target to actually track the interaction.
```
<a href="http://ad.example.com" data-track-content>
<img src="http://www.example.com/path/xyz.jpg" data-content-piece />
</a>
// content name = /path/xyz.jpg
// content piece = http://www.example.com/path/xyz.jpg
// content target = http://ad.example.com
```
A typical example for a content block that displays a banner ad.
```
<a href="http://ad.example.com" data-track-content data-content-name="My Ad">
Lorem ipsum....
</a>
// content name = My Ad
// content piece = Unknown
// content target = http://ad.example.com
```
A typical example for a content block that displays a text ad.
## Tracking the content programmatically
There are several ways to track a content impression and/or interaction manually, semi-automatically and automatically. Please be aware that content impressions will be tracked using bulk tracking which will always send a `POST` request, even if `GET` is configured which is the default.
Note: In case you have link tracking enabled you should call `enableLinkTracking()` before any of those functions.
#### `trackAllContentImpressions()`
You can use this method to scan the entire DOM for content blocks.
For each content block we will track a content impression immediately. If you only want to track visible content impression have a look at `trackVisibleContentImpressions()`.
Note: We will not send an impression of the same content block twice if you call this method multiple times unless `trackPageView()` is called meanwhile. This is useful for single page applications. The "same" content blocks means if a content block has the identical name, piece and target as an already tracked one.
Note: At this stage we do not exeute this method automatically along with a trackPageView(), we can do this later once we know it works
#### `trackVisibleContentImpressions(checkOnSroll, timeIntervalInMs)`
If you enable to track only visible content we will only track an impression if a content block is actually visible. With visible we mean the content block has been in the view port, it is actually in the DOM and is not hidden via CSS (opacity, visibility, display, ...).
* Optionally you can tell us to rescan the DOM automatically after each scroll event by passing `checkOnSroll=true`. We will then check whether the previously hidden content blocks are visible now and if so track the impression.
* Parameter defaults to boolean `true` if not specified.
* As the scroll event is triggered after each pixel scrolling would be very slow when checking for new visible content blocks each time the event is triggered. Instead we are checking every 100ms whether a scroll event was triggered and if so we scan the DOM for new visible content blocks
* Note: If a content block is placed within a scrollable element (`overflow: scroll`), we do currently not attach an event in case the user scrolls within this element. This means we would not detect that such an element becomes visible.
* Optionally you can tell us to rescan the entire DOM for new impressions every X milliseconds by passing `timeIntervalInMs=500` (rescan DOM every 500ms).
* If parameter is not set, a default interval sof 750ms will be used.
* Rescanning the entire DOM and detecting the visible state of content blocks can take a while depending on the browser and amount of content
* We do not really rescan every X milliseconds. We will schedule the next rescan after a previous scan has finished. So if it takes 20ms to scan the DOM and you tell us to rescan every 50ms it can actually take 70ms.
* In case your frames per second goes down you might want to increase this value
* If you do want to only track visible content but not want us to perform any checks automatically you can either call `trackVisibleContentImpressions()` manually at any time to rescan the entire DOM or `trackContentImpressionsWithinNode()` to check only a specific part of the DOM for visible content blocks.
* Call `trackVisibleContentImpressions(false, 0)` to initially track only visible content impressions
* Call `trackVisibleContentImpressions()` at any time again to rescan the entire DOM for newly visible content blocks or
* Call `trackContentImpressionsWithinNode(node)` at any time to rescan only a part of the DOM for newly visible content blocks
Note: You can not change the `checkOnScroll` or `timeIntervalInMs` after this method was called the first time.
#### `(checkOnSroll, timeIntervalInMs)`
Is a shorthand for calling `enableTrackOnlyVisibleContent()` and `trackContentImpressions()`.
#### `trackContentImpressionsWithinNode(domNode, contentTarget)`
You can use this method if you, for instance, dynamically add an element using JavaScript to your DOM after the we have tracked the initial impressions. Calling this method will make sure an impression will be tracked for all content blocks contained within this node.
Example
```
var div = $('<div>...<div data-track-content>...</div>...<div data-track-content>...</div></div>');
$('#id').append(div);
_paq.push(['trackContentImpressionsWithinNode', div[0]]);
```
We would detect two new content blocks in this example.
Please note: In case you have enabled to only track visible content blocks we will respect this. In case it contains a content block that was already tracked we will not track it again.
#### trackContentInteractionNode(domNode, contentInteraction)
By default we track interactions depending on a click and sometimes we cannot track interactions automatically add all. See "How do we track an interaction automatically?". In case you want to track an interaction manually for instance on a double click or on a form submit you can do this as following:
Example
```
anyElement.addEventListener('dblclick', function () {
_paq.push(['trackContentInteractionNode', this]);
});
form.addEventListener('dblclick', function () {
_paq.push(['trackContentInteractionNode', this, 'submittedForm']);
});
```
* The passed `domNode` can be any node within a content block or the content block element itself. Nothing will be tracked in case there is no content-block found.
* The content name and piece will be detected based on the content block
* Optionally you can set the name of the content interaction. If none is provided the `Unknown` will be used. Could be for instance `click` or `submit`.
* The interaction will actually only have any effect if an impression was tracked for this content-block
#### `trackContentImpression(contentName, contentPiece, contentTarget)` and `trackContentInteraction(contentName, contentPiece, contentInteraction)`
You should use those methods only in conjunction together. It is not recommended to use `trackContentInteraction()` after an impression was tracked automatically using on of the other methods as an interaction would only count if you do set the same content name and piece that was used to track the related impression.
Example
```
_paq.push(['trackContentImpression', 'Content Name', 'Content Piece', 'http://www.example.com']);
div.addEventListener('click', function () {
_paq.push(['trackContentInteraction', 'Content Name', 'Content Piece', 'tabActivated']);
});
```
Be aware that each call to one of those two methods will send one request to your Piwik tracker instance. Calling those methods too many times can cause performance problems.
## Tracking Content Impressions API
Content impressions are logically not really events and I don't think it makes sense to use them here. It would also make it harder to analyze events when they are mixed with pieces of content.
* To track a content impression you will need to send the URL parameters `c_n`, `c_p` and `c_t` for name, piece and target along a tracking request.
* `c_p` for content piece and `c_t` for content target is optional.
* Multiple content impressions can be sent using bulk tracking for faster performance
## Tracking content interactions API
Contrary to impressions, clicks are actually events and it would be nice to use events here unless it is not an outlink or download to not lose such tracking data.
* To track a content interaction you will need to send at least the URL parameters `c_n`, `c_p` and `c_i` for name and interaction
We will link interactions to impressions at archiver time.
## Database
* New column `idaction_content_url` and `idaction_content_piece` in `log_link_visit_action`. For name `idaction_name` can be reused?
Could we also reuse `idaction_url` instead of adding new column `idaction_content_url`?
And we could also store the URL of the page showing the Content in `idaction_url_ref`. (reusing columns is good in this case)
* Would we need a new column for each piece of content in action table to make archiver work? --> would result in many! columns
* or would we need a new table for each piece of content to make archiver work? --> would be basically a copy of the link_action table and therefore not really makes sense I reckon. Only a lot of work. Logically I am not sure if an impression is actually an "action" so it could make sense
* or would we store the pieces serialized as JSON in a `content` column? I don't know anything about the archiver but I think it wouldn't work at all
* or would we create an action entry for each piece of content? --> yes I think!
Yes it seems most logical to create an action entry for each Content.
## Thoughts on piwik.js
* We need to find all dom nodes having css class or html attribute.
* Options for this is traversing over each node and checking for everything -> CSS selectors cannot be used on all browsers and it might be slow therefore -> maybe lot of work to make it cross browser compatible
* https://github.com/fabiomcosta/micro-selector --> tiny selector library but does not support attributes
* http://sizzlejs.com/ Used by jQuery & co but like 30kb (compressed + gzipped 4kb). Has way too many features we don't need
* https://github.com/ded/qwery Doesn't support IE8 and a few others, no support for attribute selector
* https://github.com/padolsey/satisfy 2.4KB and probably outdated
* https://github.com/digitarald/sly very tiny and many features but last commit 3 years old
* https://github.com/alpha123/Jaguar >10KB and last commit 2 years old
* As we don't need many features we could implement it ourselves but probably needs a lot of cross-browser testing which I wanted to avoid. We'd only start with `querySelectorAll()` maybe. Brings also incredible [performance benefits](http://jsperf.com/jquery-vs-native-selector-and-element-style/2) (2-10 faster than jQuery) but there might be problems see http://stackoverflow.com/questions/11503534/jquery-vs-document-queryselectorall, http://jsfiddle.net/QdMc5/ and http://ejohn.org/blog/thoughts-on-queryselectorall/
## Reports
Nothing special here I think. We would probably automatically detect the type of content (image, video, text, sound, ...) depending on the content eg in case it ends with [.jpg, .png, .gif] it could be recognized as image content and show a banner in the report.
## TODO
* UI tests
## Notes:
* Referrer gets lost when using piwik.php
* Single page applications will always want to disable interactions as redirect would not fit into their concept!!!
* User can decide to manually setup the proper redirect URL via piwik.php?rec=1&idsite=1&clickurl={$URL_HERE}&....
* Currently, the user would also have to add event URL parameters and make sure to send the correct name and piece to match an impression.
* If the user does not use any data-content-* attributes this is very likely to fail since the auto detected content name and piece can easily change and tracking would be broken
* The only advantage I see would be that we even track clicks if we haven't added a click listener to replace the URL yet (for instance before DOM loaded)
* and/or maybe we can replace the href="" directly within the DOM so right click, middle click, shift click are also tracked
* sounds ok to me, have implement it like this. Only problem is in case a replaced link changes later for instance based on a visitor form selection.
* To prevent this I added a click event on top of it and in case it does not start with configTrackerUrl I will build it again
* it might be bad for SEO
* FYI: outlinks/downloads will be still tracked as it is done currently for simplicity (500ms) so we are talking here only about internal links that are not anchor links (starting with "#"). Those would not be tracked
* http://outlink.example.org --> not replaced -> handled the old way
* #target --> not replaced -> handled the old way. In single page application users have to call trackWhatever again
* note to myself: They should be able to parse a node that we parse for all content as you maybe wanna parse only the replaced ajax content. maybe v2
* index.php, /foo/bar --> will be directly replaced by piwik.php in case clickNode (element having clickAttribute/Class) is an "A" element
* Need to think about possible XSS. If an attacker can set href attributes on that website and we replace attribute based on that but should be ok ...
* FYI: Piwik Mobile displays currently only one metric, so people won't see impressions and number of interactions or ratio next to each other
* If user wants to track only visible content we'll need to wait until the websites load (not DOMContentLoaded) event is triggered. Otherwise CSS might be not be applied yet and we cannot detect whether node is actually visible. Downside: Some websites might take > 10 seconds until this event is triggered. Depending on how good they are developed. During this time the user might be already no longer on that page or might have already scrolled to somewhere else.
* If user wants to track all content impressions (not only the visible ones) we'd probably have to wait until at least DOMContentLoaded event is triggered
* If the load event takes like 10 seconds later, the user has maybe already scrolled and seen some content blocks but we cannot detect... so considering viewport we need to assume all above the deepest scrollpoint was seen
## Answered Questions
1. Can the same content piece have different names / targets? Can the same content name have different targets/pieces?
Maybe the unique ID of a Content can be the { Content name + Content piece }. Then we would recommend users to set the same Content target for a given tuple { Content name, Content piece }.
I hope it makes sense to assume this tuple will have always same Content target by design?
In this case I would modify questionas as follows:
* Can the same content piece have different names? Yes (eg. a banner image is used by different Content names),
* Can the same { content name, content piece } have different targets? Yes, but it's not recommended: Piwik will only aggregate one content target value. (eg. keep the latest content target value tracked for this { content name, content piece } tuple on a given day)
2. Are we always assuming the "conversion" or "target URL" is caused by a click or can it be a hover or drag/drop, ...? For a general solution we might want to assume it can be anything?
* In this case we would also rename or need an additional attribute or whatever to [data-trackclick] etc.
When drag and dropping there is a click needed by user, so maybe `data-trackclick` would still be OK in this case?
if you have better naming idea feel free to suggest. Or maybe you have other use cases besides clicks and drag n drop?
3. Would a piece of content - such as a banner - have maybe custom variables etc?
It would be nice to be able to set custom variables to Contents.
One possible use case is A/B testing. Maybe it would make sense to use Contents plugin for A/B testing. We could measure Content name = Experiment_TopMenu, Content piece = http://host/a.jpg. In a custom variable we would store "experiment => B". Then we would know that the given experiment is called Experiment_TopMenu and is defined by the image and that it's the variant B being served.
4. How do we present the data in a report? Similar to events with second dimensions? Probably depends on 1)
Second dimension would be really powerful to have (as per suggestion in 1)). It would let user see different banner images for a given banner name.
There would be two reports:
* First dimension: Banner Names, Second dimension: Banner pieces
* First dimension: Banner pieces, Second dimension: Banner names
(It's a bit simpler than Events because we don't need to switch the second dimension.)
5. I assume there can be nested content in theory. A piece of content that contains another piece of content. In this case we have to be careful when automatically picking name, target, ...
Nested content makes sense (users will do this). How would it work when several contents are nested?
Note: we don't need to handle this case in MVP but maybe worth thinking about it.
6. FYI: We would probably also need an attribute like data-target="$target" and/or the possiblity for data-trackclick="$target" since not all links might be defined via href but onclick javascript links. See next section
+1
7. HTML Attributes always take precendence over css classes or the other way around (if both defined)? I think attributes should take precendence which I think is also defined in the spec
attributes take precedence over CSS classes
8. Do we need to support IE7 and older? Firefox 3 and older?
Support modern browsers is enough (ie. last 2 years or so?).
9. "Maybe we could automatically detect when such element becomes visible, and send the Impression event automatically"
* I think we can detect whether a specific content was visible at a specific time in most cases but not necessarily automatically. We would have to check the DOM for this every few ms (in case of Carousel) and we'd also have to attach to events like scrolling etc. This can make other peoples website slow, especially on mobile but even browser. Website owners usually want to achieve 60fps to have animations and scrolling smooth and they usually invest a lot of time to achieve this. So it has to an opt-in if at all
in case user tags an element with `data-noautotrack` then it's already a kind of opt-in by user, so maybe in this case it's acceptable to check whether element tagged is visible, eg. every 500 ms ?
* Do I understand it right that we send an impression only if it is visible?
Yes.
* We'd probably have to offer a mode to send all banners independend of visibility
Sounds good: this would make Contents plugin more generic.
* We'd probably have to offer a mode to rescan all banners again at a certain time and only track those content pieces now that were not visibile before but are now
In ticket I wrote `function trackContentPieces() that will let users re-scan the page for Content pieces when DOM has changed.` but maybe instead the function should be called `rescanPageForContents` ?
* We'd probably have to offer a method to pass a DOM node and track it independent of visibility (useful for instance in case of carousel when the website owner already knows a specific content piece is visible now but does not want to use expensive events for this)
if I understand correctly it would make life of JS developers easier by providing nicer APIs to them?
so +1
* We'd maybe have to offer a mode where we are trying to detect automatically when an impression becomes visible and send it
I think that should be the default mode, ie. on page load we detect impressions, and then we also attach to events like scrolling to check ie. every 500ms whether a given Contents is visible. Would that be work?
10. FYI: "you may add a CSS class or attribute to the link element to track" => It could be also a span, a div or something else
11. FYI: There is way to much magic how content-name is found and it is neither predicatble nor understandable by users, I will simplify this and rather require users to set specific attributes! See next section
OK
12. FYI: We need to define how a content piece is defined in markup since it can be anything (was something like piwik-banner before) see next section
13. Why do we track an event for an interaction? Which is with the currently implementation done only on a click to an internal URL anyway... does it actually make sense? I mean there will be pageview -> content + event action -> same pageview after redirect. We would track same information 3 times
It makes actually no sense and I will remove it again. It makes no sense because:
* We would currently only track links to the same website as an event (as only there piwik.php is used), we could use it for other links as well but why...
* A click to an internal page of the same website is simply no event per se. Also to an outlink or download... it is not an event
* As it is possible that we would add many different EventNames (= ContentNames) and EventActions (=ContentInteraction) it would maybe make it harder for some users to analyze their event names/actions that they use for other things
* The tracked content will be already displayed in the content report anyway, why displaying the same data in 2 reports (events and contents or actually even 3 reports as a pageview will be later tracked as well). There is no value in it
* ...

View file

@ -0,0 +1,8 @@
# How to contribute
Great to have you here!
## How to submit a bug report or suggest a feature?
Please read the recommendations on writing a good [bug report](http://developer.piwik.org/guides/core-team-workflow#submitting-a-bug-report) or [feature request](http://developer.piwik.org/guides/core-team-workflow#submitting-a-feature-request).

View file

@ -1,9 +1,50 @@
# Piwik Server Log Analytics: Import your server logs in Piwik!
# Piwik Server Log Analytics
Import your server logs in Piwik with this powerful and easy to use tool.
## Requirements
* Python 2.6 or 2.7. Python 3.x is not supported.
* Update to Piwik 1.11
* Piwik >= 2.14.0
Build status (master branch) [![Build Status](https://travis-ci.org/piwik/piwik-log-analytics.svg?branch=master)](https://travis-ci.org/piwik/piwik-log-analytics)
## Supported log formats
The script will import all standard web server log files, and some files with non-standard formats. The following log formats are supported:
* all default log formats for: Nginx, Apache, IIS
* all log formats commonly used such as: NCSA Common log format, Extended log format, W3C Extended log files, Nginx JSON
* log files of some popular Cloud Saas services: Amazon CloudFront logs, Amazon S3 logs
* streaming media server log files such as: Icecast
* log files with and without the virtual host will be imported
In general, many fields are left optional to make the log importer very flexible.
## Get involved
We're looking for contributors! Feel free to submit Pull requests on Github.
### Submit a new log format
The Log Analytics importer is designed to detect and import into Piwik as many log files as possible. Help us add your log formats!
* Implement your new log format in the import_logs.py file (look for `FORMATS = {` variable where the log formats are defined),
* Add a new test in [tests/tests.py](https://github.com/piwik/piwik-log-analytics/blob/master/tests/tests.py),
* Test that the logs are imported successfully as you expected,
* Open a Pull Request,
* Check the test you have added works (the build should be green),
* One Piwik team member will review and merge the Pull Request as soon as possible.
We look forward to your contributions!
### Improve this guide
This readme page could be improved and maybe you would like to help? feel free to create a "edit" this page and create a pull request.
### Implement new features or fixes
if you're a Python developer and would like to contribute to open source log importer, check out the [list of issues for import_logs.py](https://github.com/piwik/piwik-log-analytics/issues) which lists all issues and suggestions.
## How to use this script?
@ -19,7 +60,13 @@ and will not track bots, static files, or error requests.
If you wish to track all requests the following command would be used:
python /path/to/piwik/misc/log-analytics/import_logs.py --url=http://mysite/piwik/ access.log --idsite=1234 --recorders=4 --enable-http-errors --enable-http-redirects --enable-static --enable-bots
python /path/to/piwik/misc/log-analytics/import_logs.py --url=http://mysite/piwik/ --idsite=1234 --recorders=4 --enable-http-errors --enable-http-redirects --enable-static --enable-bots access.log
### Format Specific Details
* If you are importing Netscaler log files, make sure to specify the **--iis-time-taken-secs** option. Netscaler stores
the time-taken field in seconds while most other formats use milliseconds. Using this option will ensure that the
log importer interprets the field correctly.
## How to import your logs automatically every day?
@ -39,6 +86,31 @@ You can then import your logs automatically each day (at 0:01). Setup a cron job
0 1 * * * /path/to/piwik/misc/log-analytics/import-logs.py -u piwik.example.com `date --date=yesterday +/var/log/apache/access-\%Y-\%m-\%d.log`
## Using Basic access authentication
If you protect your site with Basic access authentication then you can pass the credentials via your
cron job.
Apache configuration:
```
<Location /piwik>
AuthType basic
AuthName "Site requires authentication"
# Where all the external login/passwords are
AuthUserFile /etc/apache2/somefile
Require valid-user
</Location>
```
cron job:
```
5 0 * * * /var/www/html/piwik/misc/log-analytics/import_logs.py --url https://www.mysite.com/piwik --auth-user=someuser --auth-password=somepassword --exclude-path=/piwik/index.php --enable-http-errors --enable-reverse-dns --idsite=1 date --date=yesterday +/var/log/apache2/access-ssl-\%Y-\%m-\%d.log > /opt/scripts/import-logs.log
```
Security tips:
* Currently the credentials are not encrypted in the cron job. This should be a future enhancement.
* Always use HTTPS with Basic access authentication to ensure you are not passing credentials clear text.
## Performance
With an Intel Core i5-2400 @ 3.10GHz (2 cores, 4 virtual cores with Hyper-threading),
@ -58,15 +130,116 @@ To improve performance,
you can disable server access logging for these requests.
Each Piwik webserver (Apache, Nginx, IIS) can also be tweaked a bit to handle more req/sec.
## Advanced uses
## Setup Apache CustomLog that directly imports in Piwik
### Example Nginx Virtual Host Log Format
This log format can be specified for nginx access logs to capture multiple virtual hosts:
* log_format vhosts '$host $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"';
* access_log /PATH/TO/access.log vhosts;
When executing import_logs.py specify the "common_complete" format.
### How do I import Page Speed Metric from logs?
In Piwik> Actions> Page URLs and Page Title reports, Piwik reports the Avg. generation time, as an indicator of your website speed.
This metric works by default when using the Javascript tracker, but you can use it with log file as well.
Apache can log the generation time in microseconds using %D in the LogFormat.
This metric can be imported using a custom log format in this script.
In the command line, add the --log-format-regex parameter that contains the group generation_time_micro.
Here's an example:
Apache LogFormat "%h %l %u %t \"%r\" %>s %b %D"
--log-format-regex="(?P<ip>\S+) \S+ \S+ \[(?P<date>.*?) (?P<timezone>.*?)\] \"\S+ (?P<path>.*?) \S+\" (?P<status>\S+) (?P<length>\S+) (?P<generation_time_micro>\S+)"
Note: the group <generation_time_milli> is also available if your server logs generation time in milliseconds rather than microseconds.
### How do I setup Nginx to directly imports in Piwik via syslog?
With the syslog patch from http://wiki.nginx.org/3rdPartyModules which is compiled in dotdeb's release, you can log to syslog and imports them live to Piwik.
Path: Nginx -> syslog -> (syslog central server) -> this script -> piwik
You can use any log format that this script can handle, like Apache Combined, and Json format which needs less processing.
##### Setup Nginx logs
```
http {
...
log_format piwik '{"ip": "$remote_addr",'
'"host": "$host",'
'"path": "$request_uri",'
'"status": "$status",'
'"referrer": "$http_referer",'
'"user_agent": "$http_user_agent",'
'"length": $bytes_sent,'
'"generation_time_milli": $request_time,'
'"date": "$time_iso8601"}';
...
server {
...
access_log syslog:info piwik;
...
}
}
```
##### Setup syslog-ng
This is the config for the central server if any. If not, you can also use this config on the same server as Nginx.
```
options {
stats_freq(600); stats_level(1);
log_fifo_size(1280000);
log_msg_size(8192);
};
source s_nginx { udp(); };
destination d_piwik {
program("/usr/local/piwik/piwik.sh" template("$MSG\n"));
};
log { source(s_nginx); filter(f_info); destination(d_piwik); };
```
##### piwik.sh
Just needed to configure the best params for import_logs.py :
```
#!/bin/sh
exec python /path/to/misc/log-analytics/import_logs.py \
--url=http://localhost/ --token-auth=<your_auth_token> \
--idsite=1 --recorders=4 --enable-http-errors --enable-http-redirects --enable-static --enable-bots \
--log-format-name=nginx_json -
```
##### Example of regex for syslog format (centralized logs)
###### log format exemple
```
Aug 31 23:59:59 tt-srv-name www.tt.com: 1.1.1.1 - - [31/Aug/2014:23:59:59 +0200] "GET /index.php HTTP/1.0" 200 3838 "http://www.tt.com/index.php" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0" 365020 www.tt.com
```
###### Corresponding regex
```
--log-format-regex='.* ((?P<ip>\S+) \S+ \S+ \[(?P<date>.*?) (?P<timezone>.*?)\] "\S+ (?P<path>.*?) \S+" (?P<status>\S+) (?P<length>\S+) "(?P<referrer>.*?)" "(?P<user_agent>.*?)").*'
```
### Setup Apache CustomLog that directly imports in Piwik
Since apache CustomLog directives can send log data to a script, it is possible to import hits into piwik server-side in real-time rather than processing a logfile each day.
This approach has many advantages, including real-time data being available on your piwik site, using real logs files instead of relying on client-side Javacsript, and not having a surge of CPU/RAM usage during log processing.
The disadvantage is that if Piwik is unavailable, logging data will be lost. Therefore we recommend to also log into a standard log file. Bear in mind also that apache processes will wait until a request is logged before processing a new request, so if piwik runs slow so does your site: it's therefore important to tune --recorders to the right level.
In the most basic setup, you might have in your main config section:
##### Basic setup
You might have in your main config section:
```
# Set up your log format as a normal extended format, with hostname at the start
@ -89,8 +262,7 @@ Useful options here are:
You can have as many CustomLog statements as you like. However, if you define any CustomLog directives within a <VirtualHost> block, all CustomLogs in the main config will be overridden. Therefore if you require custom logging for particular VirtualHosts, it is recommended to use mod_macro to make configuration more maintainable.
## Advanced Log Analytics use case: Apache vhost, custom logs, automatic website creation
##### Advanced setup: Apache vhost, custom logs, automatic website creation
As a rather extreme example of what you can do, here is an apache config with:
@ -101,7 +273,7 @@ As a rather extreme example of what you can do, here is an apache config with:
NB use of mod_macro to ensure consistency and maintainability
## Apache configuration source code:
Apache configuration source code:
```
# Set up macro with the options
@ -167,93 +339,8 @@ Use piwiklog %v vhost_common main " "
</VirtualHost>
```
## Nginx Virtual Host Log Format
This log format can be specified for nginx access logs to capture multiple virtual hosts:
* log_format vhosts '$host $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"';
* access_log /PATH/TO/access.log vhosts;
When executing import_logs.py specify the "common_complete" format.
### And that's all !
## Import Page Speed Metric from logs
In Piwik> Actions> Page URLs and Page Title reports, Piwik reports the Avg. generation time, as an indicator of your website speed.
This metric works by default when using the Javascript tracker, but you can use it with log file as well.
Apache can log the generation time in microseconds using %D in the LogFormat.
This metric can be imported using a custom log format in this script.
In the command line, add the --log-format-regex parameter that contains the group generation_time_micro.
Here's an example:
Apache LogFormat "%h %l %u %t \"%r\" %>s %b %D"
--log-format-regex="(?P<ip>\S+) \S+ \S+ \[(?P<date>.*?) (?P<timezone>.*?)\] \"\S+ (?P<path>.*?) \S+\" (?P<status>\S+) (?P<length>\S+) (?P<generation_time_micro>\S+)"
Note: the group <generation_time_milli> is also available if your server logs generation time in milliseconds rather than microseconds.
## Setup Nginx to directly imports in Piwik via syslog
With the syslog patch from http://wiki.nginx.org/3rdPartyModules which is compiled in dotdeb's release, you can log to syslog and imports them live to Piwik.
Path: Nginx -> syslog -> (syslog central server) -> this script -> piwik
You can use any log format that this script can handle, like Apache Combined, and Json format which needs less processing.
### Setup Nginx logs
```
http {
...
log_format piwik '{"ip": "$remote_addr",'
'"host": "$host",'
'"path": "$request_uri",'
'"status": "$status",'
'"referrer": "$http_referer",'
'"user_agent": "$http_user_agent",'
'"length": $bytes_sent,'
'"generation_time_milli": $request_time,'
'"date": "$time_iso8601"}';
...
server {
...
access_log syslog:info piwik;
...
}
}
```
# Setup syslog-ng
This is the config for the central server if any. If not, you can also use this config on the same server as Nginx.
```
options {
stats_freq(600); stats_level(1);
log_fifo_size(1280000);
log_msg_size(8192);
};
source s_nginx { udp(); };
destination d_piwik {
program("/usr/local/piwik/piwik.sh" template("$MSG\n"));
};
log { source(s_nginx); filter(f_info); destination(d_piwik); };
```
# piwik.sh
Just needed to configure the best params for import_logs.py :
```
#!/bin/sh
exec python /path/to/misc/log-analytics/import_logs.py \
--url=http://localhost/ --token-auth=<your_auth_token> \
--idsite=1 --recorders=4 --enable-http-errors --enable-http-redirects --enable-static --enable-bots \
--log-format-name=nginx_json -
```
And that's all !
***This documentation is a community effort, feel free to suggest any change via Github Pull request.***

File diff suppressed because it is too large Load diff

View file

@ -5,7 +5,7 @@ require_once '../../libs/PiwikTracker/PiwikTracker.php';
PiwikTracker::$URL = 'http://localhost/trunk/';
$piwikTracker = new PiwikTracker($idSite = 1);
// You can manually set the Visitor details (resolution, time, plugins)
// You can manually set the Visitor details (resolution, time, plugins)
// See all other ->set* functions available in the PiwikTracker class
$piwikTracker->setResolution(1600, 1400);

View file

@ -8,17 +8,18 @@ define('PIWIK_ENABLE_DISPATCH', false);
define('PIWIK_ENABLE_ERROR_HANDLER', false);
define('PIWIK_ENABLE_SESSION_START', false);
// if you prefer not to include 'index.php', you must also define here PIWIK_DOCUMENT_ROOT
// and include "libs/upgradephp/upgrade.php" and "core/Loader.php"
require_once PIWIK_INCLUDE_PATH . "/index.php";
require_once PIWIK_INCLUDE_PATH . "/core/API/Request.php";
$environment = new \Piwik\Application\Environment(null);
$environment->init();
FrontController::getInstance()->init();
// This inits the API Request with the specified parameters
$request = new Request('
module=API
&method=UserSettings.getResolution
&method=Resolution.getResolution
&idSite=7
&date=yesterday
&period=week

View file

@ -1,7 +1,7 @@
<?php
exit; // REMOVE this line to run the script
// this token is used to authenticate your API request.
// this token is used to authenticate your API request.
// You can get the token on the API page inside your Piwik interface
$token_auth = 'anonymous';
@ -20,11 +20,11 @@ if (!$content) {
print("Error, content fetched = " . $fetched);
}
print("<h1>Keywords for the last month</h1>");
print("<h1>Keywords for the last month</h1>\n");
foreach ($content as $row) {
$keyword = htmlspecialchars(html_entity_decode(urldecode($row['label']), ENT_QUOTES), ENT_QUOTES);
$hits = $row['nb_visits'];
print("<b>$keyword</b> ($hits hits)<br>");
print("<b>$keyword</b> ($hits hits)<br>\n");
}

View file

@ -1,39 +0,0 @@
<?php
/**
* Piwik - Open source web analytics
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
use Piwik\Config;
use Piwik\FrontController;
error_reporting(E_ALL | E_NOTICE);
define('PIWIK_DOCUMENT_ROOT', dirname(__FILE__) == '/' ? '' : dirname(__FILE__) . '/../..');
if (file_exists(PIWIK_DOCUMENT_ROOT . '/bootstrap.php')) {
require_once PIWIK_DOCUMENT_ROOT . '/bootstrap.php';
}
if (!defined('PIWIK_USER_PATH')) {
define('PIWIK_USER_PATH', PIWIK_DOCUMENT_ROOT);
}
if (!defined('PIWIK_INCLUDE_PATH')) {
define('PIWIK_INCLUDE_PATH', PIWIK_DOCUMENT_ROOT);
}
ignore_user_abort(true);
set_time_limit(0);
@date_default_timezone_set('UTC');
require_once PIWIK_INCLUDE_PATH . '/libs/upgradephp/upgrade.php';
require_once PIWIK_INCLUDE_PATH . '/core/testMinimumPhpVersion.php';
require_once PIWIK_INCLUDE_PATH . '/core/Loader.php';
$GLOBALS['PIWIK_TRACKER_DEBUG'] = false;
define('PIWIK_ENABLE_DISPATCH', false);
Config::getInstance()->log['log_writers'][] = 'screen';
Config::getInstance()->log['log_level'] = 'VERBOSE';
Config::getInstance()->log['string_message_format'] = "%message%";
FrontController::getInstance()->init();

View file

@ -2,10 +2,8 @@ Count the download for 'latest.zip' on the 20th March
# cat access.log | grep "20/Mar" | grep "latest.zip" | awk '{print $1}' | sort | uniq | wc -l
Value to be compared with the one given by Piwik in Actions > Downloads
Count the no of hits by referrers, excluding piwik.org as a referer
# cat /var/log/apache2/access.log | awk '{print $11}' | grep -vE "(^"-"$|/dev.piwik.org|/piwik.org)" | sort | uniq -c | sort -rn | head -n20

234
www/analytics/misc/others/geoipUpdateRows.php Executable file → Normal file
View file

@ -1,233 +1,5 @@
<?php
use Piwik\Common;
use Piwik\Config;
use Piwik\Db;
use Piwik\FrontController;
use Piwik\IP;
use Piwik\Log;
use Piwik\Piwik;
use Piwik\Plugins\UserCountry\LocationProvider\GeoIp\Pecl;
use Piwik\Plugins\UserCountry\LocationProvider;
use Piwik\Plugins\UserCountry\LocationProvider\GeoIp\Php;
require_once './cli-script-bootstrap.php';
ini_set("memory_limit", "512M");
$query = "SELECT count(*) FROM " . Common::prefixTable('log_visit');
$count = Db::fetchOne($query);
// when script run via browser, check for Super User & output html page to do conversion via AJAX
if (!Common::isPhpCliMode()) {
try {
Piwik::checkUserHasSuperUserAccess();
} catch (Exception $e) {
Log::error('[error] You must be logged in as Super User to run this script. Please login in to Piwik and refresh this page.');
exit;
}
// the 'start' query param will be supplied by the AJAX requests, so if it's not there, the
// user is viewing the page in the browser.
if (Common::getRequestVar('start', false) === false) {
// output HTML page that runs update via AJAX
?>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<script type="text/javascript" src="../../libs/jquery/jquery.js"></script>
<script type="text/javascript">
(function ($) {
var count = <?php echo $count; ?>;
var doIteration = function (start) {
if (start >= count) {
return;
}
var end = Math.min(start + 100, count);
$.ajax({
type: 'POST',
url: 'geoipUpdateRows.php',
data: {
start: start,
end: end
},
async: true,
error: function (xhr, status, error) {
$('body')
.append(xhr.responseText)
.append('<div style="color:red"><strong>An error occured!</strong></div>');
},
success: function (response) {
doIteration(end);
$('body').append(response);
var body = $('body')[0];
body.scrollTop = body.scrollHeight;
}
});
};
doIteration(0);
}(jQuery));
</script>
</head>
<body>
</body>
</html>
<?php
exit;
} else {
$start = Common::getRequestVar('start', 0, 'int');
$end = min($count, Common::getRequestVar('end', $count, 'int'));
$limit = $end - $start;
}
} else // command line
{
$start = 0;
$end = $count;
$limit = 1000;
}
function geoipUpdateError($message)
{
Log::error($message);
if (!Common::isPhpCliMode()) {
@header('HTTP/1.1 500 Internal Server Error', $replace = true, $responseCode = 500);
}
exit;
}
// only display notes if on command line (where start will == 0 for that part of script) or on
// first AJAX call by browser
$displayNotes = $start == 0;
// try getting the pecl location provider
$provider = new Pecl();
if (!$provider->isAvailable()) {
if ($displayNotes) {
Log::info("[note] The GeoIP PECL extension is not installed.");
}
$provider = null;
} else {
$workingOrError = $provider->isWorking();
if ($workingOrError !== true) {
if ($displayNotes) {
Log::info("[note] The GeoIP PECL extension is broken: $workingOrError");
}
if (Common::isPhpCliMode()) {
Log::info("[note] Make sure your command line PHP is configured to use the PECL extension.");
}
$provider = null;
}
}
// use php api if pecl extension cannot be used
if (is_null($provider)) {
if ($displayNotes) {
Log::info("[note] Falling back to PHP API. This may become too slow for you. If so, you can read this link on how to install the PECL extension: http://piwik.org/faq/how-to/#faq_164");
}
$provider = new Php();
if (!$provider->isAvailable()) {
if ($displayNotes) {
Log::info("[note] The GeoIP PHP API is not available. This means you do not have a GeoIP location database in your ./misc directory. The database must be named either GeoIP.dat or GeoIPCity.dat based on the type of database it is.");
}
$provider = null;
} else {
$workingOrError = $provider->isWorking();
if ($workingOrError !== true) {
if ($displayNotes) {
Log::info("[note] The GeoIP PHP API is broken: $workingOrError");
}
$provider = null;
}
}
}
if (is_null($provider)) {
geoipUpdateError("\n[error] There is no location provider that can be used with this script. Only the GeoIP PECL module or the GeoIP PHP API can be used at present. Please install and configure one of these first.");
}
$info = $provider->getInfo();
if ($displayNotes) {
Log::info("[note] Found working provider: {$info['id']}");
}
// perform update
$logVisitFieldsToUpdate = array('location_country' => LocationProvider::COUNTRY_CODE_KEY,
'location_region' => LocationProvider::REGION_CODE_KEY,
'location_city' => LocationProvider::CITY_NAME_KEY,
'location_latitude' => LocationProvider::LATITUDE_KEY,
'location_longitude' => LocationProvider::LONGITUDE_KEY);
if ($displayNotes) {
Log::info("\n$count rows to process in " . Common::prefixTable('log_visit')
. " and " . Common::prefixTable('log_conversion') . "...");
}
flush();
for (; $start < $end; $start += $limit) {
$rows = Db::fetchAll("SELECT idvisit, location_ip, " . implode(',', array_keys($logVisitFieldsToUpdate)) . "
FROM " . Common::prefixTable('log_visit') . "
LIMIT $start, $limit");
if (!count($rows)) {
continue;
}
foreach ($rows as $i => $row) {
$fieldsToSet = array();
foreach ($logVisitFieldsToUpdate as $field => $ignore) {
if (empty($fieldsToSet[$field])) {
$fieldsToSet[] = $field;
}
}
// skip if it already has a location
if (empty($fieldsToSet)) {
continue;
}
$ip = IP::N2P($row['location_ip']);
$location = $provider->getLocation(array('ip' => $ip));
if (!empty($location[LocationProvider::COUNTRY_CODE_KEY])) {
$location[LocationProvider::COUNTRY_CODE_KEY] =
strtolower($location[LocationProvider::COUNTRY_CODE_KEY]);
}
$row['location_country'] = strtolower($row['location_country']);
$columnsToSet = array();
$bind = array();
foreach ($logVisitFieldsToUpdate as $column => $locationKey) {
if (!empty($location[$locationKey])
&& $location[$locationKey] != $row[$column]
) {
$columnsToSet[] = $column . ' = ?';
$bind[] = $location[$locationKey];
}
}
if (empty($columnsToSet)) {
continue;
}
$bind[] = $row['idvisit'];
// update log_visit
$sql = "UPDATE " . Common::prefixTable('log_visit') . "
SET " . implode(', ', $columnsToSet) . "
WHERE idvisit = ?";
Db::query($sql, $bind);
// update log_conversion
$sql = "UPDATE " . Common::prefixTable('log_conversion') . "
SET " . implode(', ', $columnsToSet) . "
WHERE idvisit = ?";
Db::query($sql, $bind);
}
Log::info(round($start * 100 / $count) . "% done...");
flush();
}
if ($start >= $count) {
Log::info("100% done!");
Log::info("");
Log::info("[note] Now that you've geolocated your old visits, you need to force your reports to be re-processed. See this FAQ entry: http://piwik.org/faq/how-to/#faq_59");
}
echo "This script has been removed, instead use the 'usercountry:attribute' command.\n";
echo "For example, run 'php /path/to/piwik/console usercountry:attribute 2012-01-01,2013-01-01'.\n";
echo "To learn more about the new command, run 'php /path/to/piwik/console help usercountry:attribute'.\n";

View file

@ -1,159 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<code_scheme name="Piwik-codestyle">
<option name="LINE_SEPARATOR" value="&#10;" />
<option name="RIGHT_MARGIN" value="160" />
<PHPCodeStyleSettings>
<option name="ALIGN_KEY_VALUE_PAIRS" value="true" />
<option name="LOWER_CASE_BOOLEAN_CONST" value="true" />
<option name="LOWER_CASE_NULL_CONST" value="true" />
</PHPCodeStyleSettings>
<XML>
<option name="XML_LEGACY_SETTINGS_IMPORTED" value="true" />
</XML>
<codeStyleSettings language="JavaScript">
<option name="ALIGN_MULTILINE_PARAMETERS" value="false" />
<option name="KEEP_SIMPLE_BLOCKS_IN_ONE_LINE" value="true" />
<option name="KEEP_SIMPLE_METHODS_IN_ONE_LINE" value="true" />
</codeStyleSettings>
<codeStyleSettings language="PHP">
<option name="ALIGN_MULTILINE_ARRAY_INITIALIZER_EXPRESSION" value="true" />
<arrangement>
<groups>
<group>
<type>DEPENDENT_METHODS</type>
<order>BREADTH_FIRST</order>
</group>
</groups>
<rules>
<rule>
<match>
<CONST />
</match>
</rule>
<rule>
<match>
<AND>
<FIELD />
<PUBLIC />
<STATIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<FIELD />
<PROTECTED />
<STATIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<FIELD />
<PRIVATE />
<STATIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<FIELD />
<PUBLIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<FIELD />
<PROTECTED />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<FIELD />
<PRIVATE />
</AND>
</match>
</rule>
<rule>
<match>
<CONSTRUCTOR />
</match>
</rule>
<rule>
<match>
<AND>
<METHOD />
<PUBLIC />
<STATIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<METHOD />
<PROTECTED />
<STATIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<METHOD />
<PRIVATE />
<STATIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<METHOD />
<PUBLIC />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<METHOD />
<PROTECTED />
</AND>
</match>
</rule>
<rule>
<match>
<AND>
<METHOD />
<PRIVATE />
</AND>
</match>
</rule>
<rule>
<match>
<TRAIT />
</match>
</rule>
<rule>
<match>
<INTERFACE />
</match>
</rule>
<rule>
<match>
<CLASS />
</match>
</rule>
</rules>
</arrangement>
</codeStyleSettings>
</code_scheme>

View file

@ -1,21 +0,0 @@
Phpstorm has an awesome feature called "Reformat code" which reformats all PHP code to follow a particular selected coding style.
Piwik uses PSR coding standard for php source code. We use a slightly customized PSR style
(because the default PSR style in Phpstorm results in some unwanted changes).
Steps:
* Use latest Phpstorm
* Copy this Piwik_codestyle.xml file in your ~/.WebIde60/config/codestyles/
* If you use Windows or Mac see which path to copy at: http://intellij-support.jetbrains.com/entries/23358108
* To automatically link to the file in Piwik:
`$ ln -s ~/dev/piwik-master/misc/others/phpstorm-codestyles/Piwik_codestyle.xml ~/.WebIde70/config/codestyles/Piwik_codestyle.xml`
* Restart PhpStorm.
* Select this coding in Settings>Code style.
Phpstorm can also be configured to apply the style automatically before commit.
You are now writing code that respects Piwik coding standards. Enjoy!
Reference: http://piwik.org/participate/coding-standards/

View file

@ -1,27 +0,0 @@
<?php
// Script that creates 100 websites, then outputs a IMG that records a pageview in each website
// Used initially to test how to handle cookies for this use case (see http://dev.piwik.org/trac/ticket/409)
use Piwik\Common;
use Piwik\FrontController;
use Piwik\Piwik;
use Piwik\Plugins\SitesManager\API;
exit;
define('PIWIK_INCLUDE_PATH', '../..');
define('PIWIK_ENABLE_DISPATCH', false);
define('PIWIK_ENABLE_ERROR_HANDLER', false);
define('PIWIK_ENABLE_SESSION_START', false);
require_once PIWIK_INCLUDE_PATH . "/index.php";
require_once PIWIK_INCLUDE_PATH . "/core/API/Request.php";
require_once PIWIK_INCLUDE_PATH . "/libs/PiwikTracker/PiwikTracker.php";
FrontController::getInstance()->init();
Piwik::setUserHasSuperUserAccess();
$count = 100;
for ($i = 0; $i <= $count; $i++) {
$id = API::getInstance()->addSite(Common::getRandomString(), 'http://piwik.org');
$t = new PiwikTracker($id, 'http://localhost/trunk/piwik.php');
echo $id . " <img width=100 height=10 border=1 src='" . $t->getUrlTrackPageView('title') . "'><br/>";
}

View file

@ -1,154 +0,0 @@
<?php
use Piwik\Common;
use Piwik\Config;
use Piwik\FrontController;
use Piwik\Log;
define('PIWIK_INCLUDE_PATH', realpath(dirname(__FILE__) . "/../.."));
define('PIWIK_ENABLE_DISPATCH', false);
define('PIWIK_ENABLE_ERROR_HANDLER', false);
define('PIWIK_ENABLE_SESSION_START', false);
require_once PIWIK_INCLUDE_PATH . "/index.php";
require_once PIWIK_INCLUDE_PATH . "/core/API/Request.php";
require_once PIWIK_INCLUDE_PATH . "/libs/PiwikTracker/PiwikTracker.php";
FrontController::getInstance()->init();
// SECURITY: DO NOT DELETE THIS LINE!
if (!Common::isPhpCliMode()) {
die("ERROR: Must be executed in CLI");
}
$process = new Piwik_StressTests_CopyLogs;
$process->init();
$process->run();
//$process->delete();
class Piwik_StressTests_CopyLogs
{
function init()
{
$config = Config::getInstance();
$config->log['log_only_when_debug_parameter'] = 0;
$config->log['log_writers'] = array('screen');
$config->log['log_level'] = 'VERBOSE';
}
function run()
{
// Copy all visits in date range into TODAY
$startDate = '2011-08-12';
$endDate = '2011-08-12';
$this->log("Starting...");
$db = \Zend_Registry::get('db');
$initial = $this->getVisitsToday();
$this->log(" Visits today so far: " . $initial);
$initialActions = $this->getActionsToday();
$this->log(" Actions today: " . $initialActions);
$initialPurchasedItems = $this->getConversionItemsToday();
$this->log(" Purchased items today: " . $initialPurchasedItems);
$initialConversions = $this->getConversionsToday();
$this->log(" Conversions today: " . $initialConversions);
$this->log(" Now copying visits between '$startDate' and '$endDate'...");
$sql = "INSERT INTO " . Common::prefixTable('log_visit') . " (`idsite`, `idvisitor`, `visitor_localtime`, `visitor_returning`, `visitor_count_visits`, `visit_first_action_time`, `visit_last_action_time`, `visit_exit_idaction_url`, `visit_exit_idaction_name`, `visit_entry_idaction_url`, `visit_entry_idaction_name`, `visit_total_actions`, `visit_total_time`, `visit_goal_converted`, `visit_goal_buyer`, `referer_type`, `referer_name`, `referer_url`, `referer_keyword`, `config_id`, `config_os`, `config_browser_name`, `config_browser_version`, `config_resolution`, `config_pdf`, `config_flash`, `config_java`, `config_director`, `config_quicktime`, `config_realplayer`, `config_windowsmedia`, `config_gears`, `config_silverlight`, `config_cookie`, `location_ip`, `location_browser_lang`, `location_country`, `location_provider`, `custom_var_k1`, `custom_var_v1`, `custom_var_k2`, `custom_var_v2`, `custom_var_k3`, `custom_var_v3`, `custom_var_k4`, `custom_var_v4`, `custom_var_k5`, `custom_var_v5`, `visitor_days_since_last`, `visitor_days_since_order`, `visitor_days_since_first`)
SELECT `idsite`, `idvisitor`, `visitor_localtime`, `visitor_returning`, `visitor_count_visits`, CONCAT(CURRENT_DATE() , \" \", FLOOR(RAND()*24) , \":\",FLOOR(RAND()*60),\":\",FLOOR(RAND()*60)), CONCAT(CURRENT_DATE() , \" \", FLOOR(RAND()*24) , \":\",FLOOR(RAND()*60),\":\",FLOOR(RAND()*60)), `visit_exit_idaction_url`, `visit_exit_idaction_name`, `visit_entry_idaction_url`, `visit_entry_idaction_name`, `visit_total_actions`, `visit_total_time`, `visit_goal_converted`, `visit_goal_buyer`, `referer_type`, `referer_name`, `referer_url`, `referer_keyword`, `config_id`, `config_os`, `config_browser_name`, `config_browser_version`, `config_resolution`, `config_pdf`, `config_flash`, `config_java`, `config_director`, `config_quicktime`, `config_realplayer`, `config_windowsmedia`, `config_gears`, `config_silverlight`, `config_cookie`, `location_ip`, `location_browser_lang`, `location_country`, `location_provider`, `custom_var_k1`, `custom_var_v1`, `custom_var_k2`, `custom_var_v2`, `custom_var_k3`, `custom_var_v3`, `custom_var_k4`, `custom_var_v4`, `custom_var_k5`, `custom_var_v5`, `visitor_days_since_last`, `visitor_days_since_order`, `visitor_days_since_first`
FROM `" . Common::prefixTable('log_visit') . "`
WHERE idsite >= 1 AND date(visit_last_action_time) between '$startDate' and '$endDate' ;";
$result = $db->query($sql);
$this->log(" Copying actions...");
$sql = "INSERT INTO " . Common::prefixTable('log_link_visit_action') . " (`idsite`, `idvisitor`, `server_time`, `idvisit`, `idaction_url`, `idaction_url_ref`, `idaction_name`, `idaction_name_ref`, `time_spent_ref_action`, `custom_var_k1`, `custom_var_v1`, `custom_var_k2`, `custom_var_v2`, `custom_var_k3`, `custom_var_v3`, `custom_var_k4`, `custom_var_v4`, `custom_var_k5`, `custom_var_v5`)
SELECT `idsite`, `idvisitor`, CONCAT(CURRENT_DATE() , \" \", FLOOR(RAND()*24) , \":\",FLOOR(RAND()*60),\":\",FLOOR(RAND()*60)), `idvisit`, `idaction_url`, `idaction_url_ref`, `idaction_name`, `idaction_name_ref`, `time_spent_ref_action`, `custom_var_k1`, `custom_var_v1`, `custom_var_k2`, `custom_var_v2`, `custom_var_k3`, `custom_var_v3`, `custom_var_k4`, `custom_var_v4`, `custom_var_k5`, `custom_var_v5`
FROM `" . Common::prefixTable('log_link_visit_action') . "`
WHERE idsite >= 1 AND date(server_time) between '$startDate' and '$endDate'
;"; // LIMIT 1000000
$result = $db->query($sql);
$this->log(" Copying conversions...");
$sql = "INSERT IGNORE INTO `" . Common::prefixTable('log_conversion') . "` (`idvisit`, `idsite`, `visitor_days_since_first`, `visitor_days_since_order`, `visitor_count_visits`, `idvisitor`, `server_time`, `idaction_url`, `idlink_va`, `referer_visit_server_date`, `referer_type`, `referer_name`, `referer_keyword`, `visitor_returning`, `location_country`, `url`, `idgoal`, `revenue`, `buster`, `idorder`, `custom_var_k1`, `custom_var_v1`, `custom_var_k2`, `custom_var_v2`, `custom_var_k3`, `custom_var_v3`, `custom_var_k4`, `custom_var_v4`, `custom_var_k5`, `custom_var_v5`, `items`, `revenue_subtotal`, `revenue_tax`, `revenue_shipping`, `revenue_discount`)
SELECT `idvisit`, `idsite`, `visitor_days_since_first`, `visitor_days_since_order`, `visitor_count_visits`, `idvisitor`, CONCAT(CURRENT_DATE() , \" \", FLOOR(RAND()*24) , \":\",FLOOR(RAND()*60),\":\",FLOOR(RAND()*60)), `idaction_url`, `idlink_va`, `referer_visit_server_date`, `referer_type`, `referer_name`, `referer_keyword`, `visitor_returning`, `location_country`, `url`, `idgoal`, `revenue`, FLOOR(`buster` * RAND()), CONCAT(`idorder`,SUBSTRING(MD5(RAND()) FROM 1 FOR 9)) , `custom_var_k1`, `custom_var_v1`, `custom_var_k2`, `custom_var_v2`, `custom_var_k3`, `custom_var_v3`, `custom_var_k4`, `custom_var_v4`, `custom_var_k5`, `custom_var_v5`, `items`, `revenue_subtotal`, `revenue_tax`, `revenue_shipping`, `revenue_discount`
FROM `" . Common::prefixTable('log_conversion') . "`
WHERE idsite >= 1 AND date(server_time) between '$startDate' and '$endDate' ;";
$result = $db->query($sql);
$this->log(" Copying purchased items...");
$sql = "INSERT INTO `" . Common::prefixTable('log_conversion_item') . "` (`idsite`, `idvisitor`, `server_time`, `idvisit`, `idorder`, `idaction_sku`, `idaction_name`, `idaction_category`, `price`, `quantity`, `deleted`)
SELECT `idsite`, `idvisitor`, CONCAT(CURRENT_DATE() , \" \", TIME(`server_time`)), `idvisit`, CONCAT(`idorder`,SUBSTRING(MD5(RAND()) FROM 1 FOR 9)) , `idaction_sku`, `idaction_name`, `idaction_category`, `price`, `quantity`, `deleted`
FROM `" . Common::prefixTable('log_conversion_item') . "`
WHERE idsite >= 1 AND date(server_time) between '$startDate' and '$endDate' ;";
$result = $db->query($sql);
$now = $this->getVisitsToday();
$actions = $this->getActionsToday();
$purchasedItems = $this->getConversionItemsToday();
$conversions = $this->getConversionsToday();
$this->log(" -------------------------------------");
$this->log(" Today visits after import: " . $now);
$this->log(" Actions: " . $actions);
$this->log(" Purchased items: " . $purchasedItems);
$this->log(" Conversions: " . $conversions);
$this->log(" - New visits created: " . ($now - $initial));
$this->log(" - Actions created: " . ($actions - $initialActions));
$this->log(" - New conversions created: " . ($conversions - $initialConversions));
$this->log(" - New purchased items created: " . ($purchasedItems - $initialPurchasedItems));
$this->log("done");
}
function delete()
{
$this->log("Deleting logs for today...");
$db = \Zend_Registry::get('db');
$sql = "DELETE FROM " . Common::prefixTable('log_visit') . "
WHERE date(visit_last_action_time) = CURRENT_DATE();";
$db->query($sql);
foreach (array('log_link_visit_action', 'log_conversion', 'log_conversion_item') as $table) {
$sql = "DELETE FROM " . Common::prefixTable($table) . "
WHERE date(server_time) = CURRENT_DATE();";
$db->query($sql);
}
$tablesToOptimize = array(
Common::prefixTable('log_link_visit_action'),
Common::prefixTable('log_conversion'),
Common::prefixTable('log_conversion_item'),
Common::prefixTable('log_visit')
);
\Piwik\Db::optimizeTables($tablesToOptimize);
$this->log("done");
}
function log($m)
{
Log::info($m);
}
function getVisitsToday()
{
$sql = "SELECT count(*) FROM `" . Common::prefixTable('log_visit') . "` WHERE idsite >= 1 AND DATE(`visit_last_action_time`) = CURRENT_DATE;";
return \Zend_Registry::get('db')->fetchOne($sql);
}
function getConversionItemsToday($table = 'log_conversion_item')
{
$sql = "SELECT count(*) FROM `" . Common::prefixTable($table) . "` WHERE idsite >= 1 AND DATE(`server_time`) = CURRENT_DATE;";
return \Zend_Registry::get('db')->fetchOne($sql);
}
function getConversionsToday()
{
return $this->getConversionItemsToday($table = "log_conversion");
}
function getActionsToday()
{
$sql = "SELECT count(*) FROM `" . Common::prefixTable('log_link_visit_action') . "` WHERE idsite >= 1 AND DATE(`server_time`) = CURRENT_DATE;";
return \Zend_Registry::get('db')->fetchOne($sql);
}
}

View file

@ -12,11 +12,11 @@ $trackingURL = Piwik_getUrlTrackPageView($idSite = 16, $customTitle = 'This titl
<script type="text/javascript">
var _paq = _paq || [];
(function() {
var u=(("https:" == document.location.protocol) ? "https" : "http") + "://localhost/piwik-master/";
var u="//localhost/piwik-master/";
_paq.push(["setTrackerUrl", u+"piwik.php"]);
_paq.push(["setSiteId", "16"]);
var d=document, g=d.createElement("script"), s=d.getElementsByTagName("script")[0]; g.type="text/javascript";
g.defer=true; g.async=true; g.src=u+"js/piwik.js"; s.parentNode.insertBefore(g,s);
var d=document, g=d.createElement("script"), s=d.getElementsByTagName("script")[0];
g.type="text/javascript"; g.async=true; g.defer=true; g.src=u+"piwik.js"; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Piwik Code -->

View file

@ -1,11 +1,14 @@
<?php
exit; // Remove this line before using the script
// How to remove the piwik/ directory if it does not work in FTP?
// 1) Download and upload this file to your webserver
// 2) Put this file in the folder that contains the piwik/ directory (above the piwik/ directory)
// 2) Remove the 2nd line (the "exit;")
// 3) Put this file in the folder that contains the piwik/ directory (above the piwik/ directory)
// For example if the piwik/ folder is at http://your-site/piwik/ you put the file in http://your-site/uninstall-delete-piwik-directory.php
// 3) Go with your browser to http://your-site/uninstall-delete-piwik-directory.php
// 4) The folder http://your-site/piwik/ should now be deleted!
// We hope you enjoyed Piwik. If you have any feedback why you stopped using Piwik,
// 4) Go with your browser to http://your-site/uninstall-delete-piwik-directory.php
// 5) The folder http://your-site/piwik/ should now be deleted!
// We hope you enjoyed Piwik. If you have any feedback why you stopped using Piwik,
// please let us know at hello@piwik.org - we are interested by your experience
function unlinkRecursive($dir)
{

View file

@ -1,55 +1,3 @@
## Piwik Proxy Hide URL
This script allows to track statistics using Piwik, without revealing the
Piwik Server URL. This is useful for users who track multiple websites
on the same Piwik server, but don't want to show the Piwik server URL in
the source code of all tracked websites.
# Piwik Proxy Hide URL
### Requirements
To run this properly you will need
* Piwik server latest version
* One or several website(s) to track with this Piwik server, for example http://trackedsite.com
* The website to track must run on a server with PHP5 support
* In your php.ini you must check that the following is set: `allow_url_fopen = On`
### How to track trackedsite.com in your Piwik without revealing the Piwik server URL?
1. In your Piwik server, login as Super user
2. create a user, set the login for example: "UserTrackingAPI"
3. Assign this user "admin" permission on all websites you wish to track without showing the Piwik URL
4. Copy the "token_auth" for this user, and paste it below in this file, in `$TOKEN_AUTH = "xyz"`
5. In this file, below this help test, edit $PIWIK_URL variable and change http://your-piwik-domain.example.org/piwik/ with the URL to your Piwik server.
6. Upload this modified piwik.php file in the website root directory, for example at: http://trackedsite.com/piwik.php
This file (http://trackedsite.com/piwik.php) will be called by the Piwik Javascript,
instead of calling directly the (secret) Piwik Server URL (http://your-piwik-domain.example.org/piwik/).
7. You now need to add the modified Piwik Javascript Code to the footer of your pages at http://trackedsite.com/
Go to Piwik > Settings > Websites > Show Javascript Tracking Code.
Copy the Javascript snippet. Then, edit this code and change the last lines to the following:
```
[...]
(function() {
var u=(("https:" == document.location.protocol) ? "https" : "http") + "://trackedsite.com/";
_paq.push(["setTrackerUrl", u+"piwik.php"]);
_paq.push(["setSiteId", "trackedsite-id"]);
var d=document, g=d.createElement("script"), s=d.getElementsByTagName("script")[0]; g.type="text/javascript";
g.defer=true; g.async=true; g.src=u+"piwik.php"; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Piwik Code -->
```
What's changed in this code snippet compared to the normal Piwik code?
* the (secret) Piwik URL is now replaced by your website URL
* the "piwik.js" becomes "piwik.php" because this piwik.php proxy script will also display and proxy the Javascript file
* the `<noscript>` part of the code at the end is removed,
since it is not currently used by Piwik, and it contains the (secret) Piwik URL which you want to hide.
* make sure to replace trackedsite-id with your idsite again.
8. Paste the modified Piwik Javascript code in your website "trackedsite.com" pages you wish to track.
This modified Javascript Code will then track visits/pages/conversions by calling trackedsite.com/piwik.php
which will then automatically call your (hidden) Piwik Server URL.
9. Done!
At this stage, example.com should be tracked by your Piwik without showing the Piwik server URL.
Repeat the steps 6, 7 and 8 for each website you wish to track in Piwik.
The proxy script has been moved to [piwik/tracker-proxy](https://github.com/piwik/tracker-proxy).

View file

@ -1,73 +0,0 @@
<?php
/**
* Piwik - Open source web analytics
* Piwik Proxy Hide URL
*
* @link http://piwik.org/faq/how-to/#faq_132
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
// -----
// Important: read the instructions in README.md or at:
// https://github.com/piwik/piwik/tree/master/misc/proxy-hide-piwik-url#piwik-proxy-hide-url
// -----
// Edit the line below, and replace http://your-piwik-domain.example.org/piwik/
// with your Piwik URL ending with a slash.
// This URL will never be revealed to visitors or search engines.
$PIWIK_URL = 'http://your-piwik-domain.example.org/piwik/';
// Edit the line below, and replace xyz by the token_auth for the user "UserTrackingAPI"
// which you created when you followed instructions above.
$TOKEN_AUTH = 'xyz';
// Maximum time, in seconds, to wait for the Piwik server to return the 1*1 GIF
$timeout = 5;
// DO NOT MODIFY BELOW
// ---------------------------
// 1) PIWIK.JS PROXY: No _GET parameter, we serve the JS file
if (empty($_GET)) {
$modifiedSince = false;
if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) {
$modifiedSince = $_SERVER['HTTP_IF_MODIFIED_SINCE'];
// strip any trailing data appended to header
if (false !== ($semicolon = strpos($modifiedSince, ';'))) {
$modifiedSince = strtotime(substr($modifiedSince, 0, $semicolon));
}
}
// Re-download the piwik.js once a day maximum
$lastModified = time() - 86400;
// set HTTP response headers
header('Vary: Accept-Encoding');
// Returns 304 if not modified since
if (!empty($modifiedSince) && $modifiedSince < $lastModified) {
header(sprintf("%s 304 Not Modified", $_SERVER['SERVER_PROTOCOL']));
} else {
header('Last-Modified: ' . gmdate('D, d M Y H:i:s') . ' GMT');
@header('Content-Type: application/javascript; charset=UTF-8');
if ($piwikJs = file_get_contents($PIWIK_URL . 'piwik.js')) {
echo $piwikJs;
} else {
header($_SERVER['SERVER_PROTOCOL'] . '505 Internal server error');
}
}
exit;
}
// 2) PIWIK.PHP PROXY: GET parameters found, this is a tracking request, we redirect it to Piwik
$url = sprintf("%spiwik.php?cip=%s&token_auth=%s&", $PIWIK_URL, @$_SERVER['REMOTE_ADDR'], $TOKEN_AUTH);
foreach ($_GET as $key => $value) {
$url .= $key . '=' . urlencode($value) . '&';
}
header("Content-Type: image/gif");
$stream_options = array('http' => array(
'user_agent' => @$_SERVER['HTTP_USER_AGENT'],
'header' => sprintf("Accept-Language: %s\r\n", @str_replace(array("\n", "\t", "\r"), "", $_SERVER['HTTP_ACCEPT_LANGUAGE'])),
'timeout' => $timeout
));
$ctx = stream_context_create($stream_options);
echo file_get_contents($url, 0, $ctx);

View file

@ -1,33 +0,0 @@
<Files ~ "\.(php|php4|php5|inc|tpl|in|twig)$">
<IfModule mod_access.c>
Deny from all
Require all denied
</IfModule>
<IfModule !mod_access_compat>
<IfModule mod_authz_host.c>
Deny from all
Require all denied
</IfModule>
</IfModule>
<IfModule mod_access_compat>
Deny from all
Require all denied
</IfModule>
</Files>
<Files ~ "\.(test\.php|gif|ico|jpg|png|svg|js|css|swf)$">
<IfModule mod_access.c>
Allow from all
Require all granted
</IfModule>
<IfModule !mod_access_compat>
<IfModule mod_authz_host.c>
Allow from all
Require all granted
</IfModule>
</IfModule>
<IfModule mod_access_compat>
Allow from all
Require all granted
</IfModule>
Satisfy any
</Files>