#!/usr/bin/ruby -w

#=======================================================================
# leeurl.rb - extraer los URL de un documento HTML
#-----------------------------------------------------------------------
# 
#-----------------------------------------------------------------------
# FJA - neocipres@gmail.com 		Septiembre de 2008
#=======================================================================

require 'open-uri'

URL_LOCATIONS = %w{http https ftp img mailto}.freeze
texto = ''
html = ''

begin
 cad_url, todos = ARGV
 raise if !cad_url
 uri = URI.parse(cad_url)
 texto = uri.read
 if todos
  urls = URI.extract(texto)
 else
  urls = URI.extract(texto, URL_LOCATIONS)
 end
 print "\n\t", "="*79 ,"\n"
 urls.each {|u| puts "\t #{u} "}
 print "\t", "="*79,"\n\n"
 html << '<html lang="es-es">' << "\n"
 html << '<head>' << '<meta http-equiv="content-type" content="text/html; charset=utf-8">' << '</head>' << "\n"
 html << '<hr>' << '<h2 align="center">' << "# Host -> #{uri.host}&nbsp;&nbsp;&nbsp; Puerto -> #{uri.port} &nbsp;&nbsp;&nbsp;URL -> #{cad_url} #" << "</h2>" << '<hr>' << '<br>' << "\n"
 urls.each do |u|
  html << ' <p> &nbsp;&nbsp;&nbsp; <a target="_blank"' << " href=#{u}> #{u}  </a>  </p>" << "\n"
 end
 html << '</html>' << "\n"
 open("/tmp/urls.html", "w") do |f|
  f.write(html)
 end
 print "\n\t Generado el archivo: /tmp/urls.html \n\n"
rescue
 if !cad_url
  	print "\n\t", "="*65
  	print "\n\t Uso: leeurl <URL> [t]\n"
	print "\n\t\t  leeurl es un enlace simbólico a cb18.rb"
	print "\n\t\t  ejemplo de URL: http://www.gnu.org"
	print "\n\t", "="*65, "\n\n"
 else
  print "\n\t Error-> #{$!} \n\n"
 end
 
end