====== Requêtes SQL sur les journaux de squid ======
Avec la contrib [[http://wiki.contribs.org/WebFiltering|WebFiltering]], tous les journaux de squid sont inscrit dans une base de données MySQL. Cette page recense quelques requêtes utiles pour l'analyse de ces données
* Les 60 noms de domaines les plus visités sur les 3 derniers mois
SELECT DOMAIN, COUNT( DOMAIN ) AS occurances
FROM access_log
WHERE date_day > DATE_SUB( CURDATE( ) , INTERVAL 2 MONTH )
AND DOMAIN NOT LIKE '192.168.%'
GROUP BY DOMAIN
ORDER BY occurances DESC
LIMIT 60;
* La même chose, en essayant d'enlever des résultats les serveurs de pub et autres traceurs les plus courants
SELECT DOMAIN, COUNT( DOMAIN ) AS occurances
FROM access_log
WHERE date_day > DATE_SUB( CURDATE( ) , INTERVAL 2 MONTH )
AND DOMAIN NOT LIKE '192.168.%'
AND DOMAIN NOT LIKE '%adnxs.com'
AND DOMAIN NOT LIKE '%google-analytics.com'
AND DOMAIN NOT LIKE '%cedexis.com'
AND DOMAIN NOT LIKE '%cedexis-radar.net'
AND DOMAIN NOT LIKE '%xiti.com'
AND DOMAIN NOT LIKE 'ads.%'
AND DOMAIN NOT LIKE '%adnext.fr'
AND DOMAIN NOT LIKE '%.addthis.com'
AND DOMAIN NOT LIKE '%.googleadservices.com'
AND DOMAIN NOT LIKE '%.scorecardresearch.com'
AND DOMAIN NOT LIKE '%.estat.com'
AND DOMAIN NOT LIKE '%.doubleclick.net'
AND DOMAIN NOT LIKE 'pubs.lemonde.fr'
AND DOMAIN NOT LIKE '%.quantserve.com'
AND DOMAIN NOT LIKE '%.pubdirecte.com'
AND DOMAIN NOT LIKE '%.ligatus.com'
AND DOMAIN NOT LIKE '%.ezakus.net'
AND DOMAIN NOT LIKE 'adserver.adtech.de'
AND DOMAIN NOT LIKE '%.yieldmanager.com'
AND DOMAIN NOT LIKE 'stats.wordpress.com'
AND DOMAIN NOT LIKE '%.smartadserver.com'
GROUP BY DOMAIN
ORDER BY occurances DESC
LIMIT 60
* Les 30 plus gros consommateurs de web (en bande passante), sur les 3 derniers mois
SELECT client_ip AS 'Adresse IP', username AS Utilisateur, SUM( reply_size / ( 1024 *1024 ) ) AS Volume
FROM access_log where client_ip!='127.0.0.1'
AND date_day > DATE_SUB( CURDATE( ) , INTERVAL 2 MONTH )
GROUP BY client_ip
ORDER BY Volume DESC
LIMIT 30;
*