#!/usr/bin/ruby -w #======================================================================= # leeurl.rb - extraer los URL de un documento HTML #----------------------------------------------------------------------- # #----------------------------------------------------------------------- # FJA - neocipres@gmail.com Septiembre de 2008 #======================================================================= require 'open-uri' URL_LOCATIONS = %w{http https ftp img mailto}.freeze texto = '' html = '' begin cad_url, todos = ARGV raise if !cad_url uri = URI.parse(cad_url) texto = uri.read if todos urls = URI.extract(texto) else urls = URI.extract(texto, URL_LOCATIONS) end print "\n\t", "="*79 ,"\n" urls.each {|u| puts "\t #{u} "} print "\t", "="*79,"\n\n" html << '<html lang="es-es">' << "\n" html << '<head>' << '<meta http-equiv="content-type" content="text/html; charset=utf-8">' << '</head>' << "\n" html << '<hr>' << '<h2 align="center">' << "# Host -> #{uri.host} Puerto -> #{uri.port} URL -> #{cad_url} #" << "</h2>" << '<hr>' << '<br>' << "\n" urls.each do |u| html << ' <p> <a target="_blank"' << " href=#{u}> #{u} </a> </p>" << "\n" end html << '</html>' << "\n" open("/tmp/urls.html", "w") do |f| f.write(html) end print "\n\t Generado el archivo: /tmp/urls.html \n\n" rescue if !cad_url print "\n\t", "="*65 print "\n\t Uso: leeurl <URL> [t]\n" print "\n\t\t leeurl es un enlace simbólico a cb18.rb" print "\n\t\t ejemplo de URL: http://www.gnu.org" print "\n\t", "="*65, "\n\n" else print "\n\t Error-> #{$!} \n\n" end end