#!/usr/bin/python
# -*- coding: utf-8 -*-
#
#--------------------------------------------------------------------------------
#opera_blocklist.py v0.1, Copyright Bjoern Olausson
#--------------------------------------------------------------------------------
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#To view the license visit
#http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
#or write to
#Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#--------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
#
#http://pgl.yoyo.org/adservers/ holds a regular updated list of hostnames/IPs which
#provide advertising services such as banners. Many banners are just annoying because
#they are animated. To get rid of many of these, feed opera with this list.
#To stay up to date, just run this script weekly via cron.
#
#Another alternative would be, to redirect the advertising hostnames to localhost by adding
#them to /etc/hosts, which would affect the whole system. A ready to use hostfile and updatescript
#can be found here: http://hostsfile.mine.nu/ (http://hostsfile.mine.nu/downloads/updatehosts.sh.txt)
#
import os, urllib, errno, sys, shutil, re

#~~~~~~~~~~~~~~ CONFIGURE VARIABLES BELOW TO FIT YOUR SYSTEM ~~~~~~~~~~~~~~

#Path to Opera urlfilter.ini
URLFILTER = "/home/blub/.opera/urlfilter.ini"
#URL to the blocklist in plaintext operafilter format
BLOCKLIST_URL = "http://pgl.yoyo.org/as/serverlist.php?hostformat=operafilter&showintro=0&mimetype=plaintext"
#List of URLs or part matching multiple URLs you want to remove from the blocklist (for what reason ever...)
#KILL = ['URL','adserver','banner',...]
KILL = []

#~~~~~~~~~~~~~~ NOTHING TO CONFIGUER BELOW THIS LINE ~~~~~~~~~~~~~~

URLFILTER_NEW = URLFILTER.replace(".ini", ".ini.new")
URLFILTER_BACKUP = URLFILTER.replace(".ini", ".ini.bac")

INIPATH =  os.path.split(URLFILTER)[0]
os.chdir(INIPATH)

#Define a function for file selection
def linefilter(content,mark):
    start = False
    content = iter(content)
    for line in content :
	if line.strip() == mark :
	    start = True
	if start and not line.strip() == mark :
	    yield line.strip("\n")

#Open the new file for writing
URLFILTERNEW = open(URLFILTER_NEW, "w")

#Write copy the first lines up to and including [exclude] to the new file
for headline in open(URLFILTER, "r"):
    if headline.strip() != "[exclude]":
	URLFILTERNEW.write(headline)
    if headline.strip() == "[exclude]":
	URLFILTERNEW.write(headline)
	break

#Open the BLocklist from URL and convert it to a list
try:
    BLOCKLIST_CONT = list(urllib.urlopen(BLOCKLIST_URL))
except IOError:
    	raise Exception("Failed to download the Blocklist")

#Check if the downloaded content is what we need (Search for "operafilter" keyword)
check = re.compile("operafilter")
if not check.search("".join(BLOCKLIST_CONT)):
    raise Exception("Failed to download a correct Blocklist")

#Get all lines after "[exclude]" from the original Opera urlfilter.ini file and the downloaded Blocklist
OPERA_EXCLUDELIST = list(linefilter(open(URLFILTER, "r"), "[exclude]"))
BLOCK_EXCLUDELIST = list(linefilter(BLOCKLIST_CONT, "[exclude]"))

#Merge the two lists
for value in OPERA_EXCLUDELIST:
    BLOCK_EXCLUDELIST.append(value)

#Remove duplicate entries and sort the merged list
UNIQUE_EXCLUDE = list(set(BLOCK_EXCLUDELIST))
UNIQUE_EXCLUDE.sort()

#Get the items of the which should be removed
KILL_ITEMS = []
for exclude in KILL:
    exclude = re.compile(exclude)
    for index in xrange(len(UNIQUE_EXCLUDE)):
	if exclude.search(UNIQUE_EXCLUDE[index]):
	    KILL_ITEMS.append(UNIQUE_EXCLUDE[index])

#Remove the all found items
for items in KILL_ITEMS:
    UNIQUE_EXCLUDE.remove(items)

#Write the merged list to urlfilter.ini.new
URLFILTERNEW.write("".join([line+"\n" for line in UNIQUE_EXCLUDE]))
URLFILTERNEW.close()

#Backup the old Oper urlfiter.ini file to urlfiter.ini.bac
shutil.copy(URLFILTER, URLFILTER_BACKUP)

#Overwrite the old urlfiter.ini with the new urlfiter.ini.new
os.rename(URLFILTER_NEW, os.path.split(URLFILTER)[1])
