Imprimir Página - Busqueda en la RAE (rae.py)

Código

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author : Leo Gutiérrez Ramirez, leorocko13@hotmail.com
# html2txt by Francois Granger, <fgranger@altern.org>
import sys
import re
import urllib
 
def Usage():
	print "\nRealiza busquedas en el diccionario de la [Real Academia Española]\n";
	print "\t" + sys.argv[0] + " Palabra\n";
	sys.exit(1);
 
if len(sys.argv) <= 1:
	Usage();
 
url = "http://buscon.rae.es/draeI/SrvltGUIBusUsual?LEMA=" + sys.argv[1]
f = urllib.urlopen(url);
texto = f.read();
f.close();
 
# Convierte el código HMTL a texto para ser mostrado correctamente en la consola:
 
p = re.compile('(<p.*?>)|(<tr.*?>)', re.I)
t = re.compile('<td.*?>', re.I)
comm = re.compile('<!--.*?-->', re.M)
tags = re.compile('<.*?>', re.M)
 
def html2txt(texto, hint = 'entity', code = 'ISO-8859-1'):
        texto = texto.replace('\n', '') # remove returns time this compare to split filter join
        texto = p.sub('\n', texto) # replace p and tr by \n
        texto = t.sub('\t', texto) # replace td by \t
        texto = comm.sub('', texto) # remove comments
        texto = tags.sub('', texto) # remove all remaining tags
        texto = re.sub(' +', ' ', texto) # remove running spaces this remove the \n and \t
	# Handling of entities
	result = texto;
	pass;
	return result;
print html2txt(texto);
sys.exit(0);

Modo de uso:

Código:

python rae.py palabra

Foro de elhacker.net

Programación => Scripting => Mensaje iniciado por: leogtz en 1 Abril 2010, 04:48 am