Last week, I had to monitor one of our website, to make sure N div was correctly displayed on the page.
I had to be alerted everytime the number of div changed on the page.
I made a little script based on this one
Basically, it stores the data using a pickle file, and send an email every time the number of the specific div changes on the page.
#!/usr/bin/env python
# sample usage: checksites.py yoursite.com othersite.org
import pickle, os, sys, logging
import string
import email.utils
from httplib import HTTPConnection, socket
from smtplib import SMTP
from email.mime.text import MIMEText
PROXY = ''
#===== Headers =================================================================
# 'Accept' : 'text/plain, text/html',
headers_get = {
'Accept-Encoding' : 'gzip, deflate',
'Accept' : '*/*',
'Accept-Language' : 'en-au',
'Connection' : 'Keep-Alive',
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727'
}
headers_post = {
'Content-type' : 'application/x-www-form-urlencoded',
'Accept' : 'text/plain'
}
def email_alert(url, status):
print("sending email...")
fromaddr = 'from@mail.com'
toaddrs = 'to@mail.com'
server = SMTP('smtp@mail.com')
msg = MIMEText('alert for %s : \r\n %s' % (url, status) )
msg['To'] = email.utils.formataddr(('Recipient', toaddrs))
msg['From'] = email.utils.formataddr(('Author', fromaddr))
msg['Subject'] = 'CHANGEMENT CACHE ADSERVER: '+status
#server.starttls()
#server.login('you', 'password')
server.sendmail(fromaddr, [toaddrs], msg.as_string())
#server.quit()
server.close()
print("email sent")
#count the number of "boite"
def count_boite_element (data):
return string.count(data,'<div class="element_boite">')
#get status (up or down)
def get_site_status(response):
status = 000
try:
status = getattr(response, 'status')
if status == 200 or status == 302:
return 'up (%d)' % (status)
except AttributeError:
pass
print(response)
return 'down (%d)' % (status)
#get the HTTPConnect response object from url
def get_response(url):
'''Return response object from URL'''
if (url.find('/')>=0):
print(url)
base_url = url[0:url.find('/')]
path = url[len(base_url):]
else:
base_url = url
path = ""
print("init conn sur "+base_url+" => "+path)
try:
conn = HTTPConnection(base_url)
conn.request('GET', path, None, headers_get)
print("conn done")
return conn.getresponse()
except socket.error:
return None
except:
logging.error('Bad URL:', url)
exit(1)
def get_headers(url):
'''Gets all headers from URL request and returns'''
response = get_response(url)
try:
return getattr(response, 'getheaders')()
except AttributeError:
return 'Headers unavailable'
def compare_site_status(prev_results):
'''Report changed status based on previous results'''
def is_status_changed(url):
response = get_response(url)
status = get_site_status(response)
if status.find('down')<0:
data = response.read()
boite_nb = count_boite_element(data)
else:
boite_nb = 0
friendly_status = '%s is %s with %s boite element' % (url, status, boite_nb)
print friendly_status
print(prev_results)
if (url in prev_results) and ((prev_results[url]['status'] != status) or ( prev_results[url]['boite_nb'] != boite_nb)) :
logging.warning(friendly_status)
# Email status messages
email_alert(url, friendly_status)
prev_results[url] = {'status' : status, 'boite_nb' : boite_nb}
return is_status_changed
def is_internet_reachable():
'''Checks Google then Yahoo just in case one is down'''
if get_site_status(get_response('www.google.com')).find('down')>=0 and get_site_status(get_response('www.yahoo.com')).find('down')>=0 :
return False
return True
def load_old_results(file_path):
'''Attempts to load most recent results'''
pickledata = {}
if os.path.isfile(file_path):
picklefile = open(file_path, 'rb')
pickledata = pickle.load(picklefile)
picklefile.close()
return pickledata
def store_results(file_path, data):
'''Pickles results to compare on next run'''
output = open(file_path, 'wb')
pickle.dump(data, output)
output.close()
def main(urls):
# Setup logging to store time
logging.basicConfig(level=logging.WARNING, filename='checksites.log',
format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
# Load previous data
pickle_file = 'data.pkl'
pickledata = load_old_results(pickle_file)
# Check sites only if Internet is_available
if is_internet_reachable():
status_checker = compare_site_status(pickledata)
map(status_checker, urls)
else:
logging.error('Either the world ended or we are not connected to the net.')
# Store results in pickle file
store_results(pickle_file, pickledata)
if __name__ == '__main__':
# First arg is script name, skip it
main(sys.argv[1:])