/usr/lib/clinica/plugins/AgenziaDelFarmaco.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#

import urllib2, urllib, cookielib, re, sys, sqlite3, gettext, threading, os
from gi.repository import Clinica, GObject, Gtk, Gdk, Peas, Gee

_ = gettext.gettext

class MedicalSuppliesSearchEngine (GObject.Object, Clinica.UserInterfaceActivatable, Clinica.MedicineSearchEngine):
    """The main plugin loaded in Clinica, it manages hooking up to the
    toolbar menu and de-hooking when it is unloaded"""

    user_interface = GObject.property(type=Clinica.UserInterface)
    resource_manager = GObject.property(type=Clinica.ResourceManager)

    __gtype_name__ = "AgenziaDelFarmcoSearchEngine"

    def do_search_medicine (self, key, treeview):
        af = AgenziaDelFarmacoSearchEngine (key)
        self.stop_searching = False
        for id_code in af:
            medicine = af.ottieni_farmaco (id_code)
            if (self.stop_searching):
                return
            treeview.push_medicine (medicine)

    def do_abort_search (self):
       self.stop_searching = True

    def do_get_name (self):
        return "Agenzia del Farmaco"
	
    def do_activate(self):
	# Register as a valid search engine
        self.resource_manager.register_medicine_search_engine (self)


    def do_deactivate(self):
        # Unregister the engine
        self.resource_manager.unregister_medicine_search_engine (self)

    def do_update_state(self):
        pass

class AgenziaDelFarmacoSearchEngine ():

    def __init__ (self, nome_farmaco = None):
        self.nome_farmaco = nome_farmaco
        self.base_url = "http://farmaco.agenziafarmaco.it/index.php?SEARCH=yes&S_DESCR_SPECIALITA=%s&S_SOSTANZA=&S_DITTA=%s&SSN=&DSNOTA_AIFA=&GRUPPO_RICETTA="
        self.post_base_url = "http://farmaco.agenziafarmaco.it/index.php?SCHEDA_CONF=yes"
        self.cookiejar = cookielib.CookieJar ()
        self.parsing = False
        self.pages_available = False
        self.found = []
        self.page = 0

    def __iter__ (self):
        return self

    def parse_next (self):
        element = self.found.pop ()
        if len(self.found) == 0:
            self.parsing = False
        return element

    def load_next_page (self):
        if self.page >= self.pagine:
            raise StopIteration
        self.page += 1
        url = (self.get_url ()) + ("&PAGE=%d" % self.page)
        try:
            request = urllib2.Request (url)
            response = urllib2.urlopen (request)
            self.content = response.read ()
            self.cookiejar.extract_cookies (response, request)
        except Exception, e:
            print e
            raise StopIteration

        self.found = re.findall ("name=\"AIC\" value=\"(\d+)\"", self.content)
        self.parsing = True
        if len(self.found) == 0:
            self.page_available = False
        if len(self.found) <= 0:
            self.parsing = False

    def get_url (self):
        return self.base_url % (self.nome_farmaco.upper (), "")
    
    def next (self):
        """Search for medicals with the name specified
        and return a list of results found"""
        if self.parsing:
            return self.parse_next ()

        if self.pages_available:
            self.load_next_page ()
            return self.parse_next  ()
        
        url = self.get_url ()

        try:
            request = urllib2.Request (url)
            response = urllib2.urlopen (request)
            self.content = response.read ()
            self.cookiejar.extract_cookies (response, request)
        except Exception, e:
            print e
            raise StopIteration
        
        try:
            prodotti = int(re.findall (r"Numero totale prodotti trovati: <b>(\d+)</b>", self.content)[0])
            self.pagine = int(re.findall (r"Salta a pagina \(1-(\d+)\)", self.content)[0])
        except IndexError:
            raise StopIteration

        self.pages_available = True

        self.load_next_page ()
        while not self.parsing:
            if self.pages_available:
                self.load_next_page ()
            else:
                raise StopIteration
        return self.parse_next ()

    def ottieni_farmaco (self, id):
        url = self.post_base_url
        data = { 
            "SCHEDA_CONF": "yes",
            "AIC": str(id),
            }
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        headers = { 
            'User-Agent': user_agent,
            'Host': "farmaco.agenziafarmaco.it",
            'Referer': "http://farmaco.agenziafarmaco.it/index.php?SEARCH=yes&S_DESCR_SPECIALITA=TACHIPIRINA&S_SOSTANZA=&S_DITTA=&SSN=&DSNOTA_AIFA=&GRUPPO_RICETTA=",
            }
        
        enc = urllib.urlencode (data)
        request = urllib2.Request (url, enc, headers)
        self.cookiejar.add_cookie_header (request)
        content = urllib2.urlopen (request).read ()

        results = []
        for item in re.findall (r"<td class=input><b>(.*)</b>", content):
            results.append (item)

        results = map (lambda x : unicode(x.strip ().decode ("utf-8")), results)
        if (len(results) != 14):
            return None
        m = Clinica.Medicine ()
        m.set_property ("description",  results[1])
        m.set_property ("name", results[2])
        m.set_property ("active-ingredient", results[3])
        m.set_property ("id", results[5])
        m.set_property ("storage-reccomendations", results[6])
        if "." in results[11]:
            m.set_property ("price", results[11] + " €".decode("utf-8"))
        else:
            m.set_property ("price", results[11])
            
        # Create other notes property
        other_notes_elements = [
            u"<b>Forma farmaceutica:</b> %s" % results[7],
            u"<b>Regime di fornitura:</b> %s" % results[8],
            u"<b>Classe di rimborsabilità:</b> %s" % results[9],
            u"<b>Ditta:</b> %s" % results[13]
        ]
        
        # Add AIFA not if not empty
        if (results[10].strip() != ""):
            other_notes_elements.append (u"<b>Nota AIFA:</b> %s" % results[10])
            
        # Set the Other notes property
        m.set_property ("other-notes",
                        u"\n".join (other_notes_elements))
        return m
clinica-plugins 0.2.1~dfsg-1 / usr / lib / clinica / plugins / AgenziaDelFarmaco.py