Script for searching files on rapidshare

This is a script in Ruby, that I wrote, which use filestube.com to search files on rapidshare (It don’t use filestube API — it just fetch and parse html).

#!/usr/bin/ruby

require 'net/http'
require 'uri'
require 'optparse'
require 'rexml/document'

def get(url, data=nil)
  url = URI.parse(url)
  http = Net::HTTP.new(url.host)
  if data
    data = data.map{|k,v| "#{k}=#{url_escape(v)}"}.reduce {|a,b|
        "#{a}&#{b}"
    }
    path = url.path + '?' + data
  else
    path = url.path
  end
  res = http.get(path, {'Cookie' => 'usr_timeoffset=0'})
  res.body
end

def url_escape(o)
  if o.class == ''.class
    o.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
      '%' + $1.unpack('H2' * $1.size).join('%').upcase
    }.tr(' ', '+')
  else
    return o
  end 
end

def filestube_urls(query, page=nil, sort=nil, size=nil)
  url = "http://www.filestube.com/search.html"
  data = {
    'q' => query,
    'select' => 'All',
    'hosting' => 1
  }
  if size
    data['size'] = size
  end
  if sort
    data['sort'] = sort
  end
  regex = /<a href="([^"]*)" class="resultsLink">.*?<\/a>/
  if page
    data['page'] = page
  end
  get(url, data).scan(regex).map {|i| i[0]}
end

def extract_urls(page)
  if page =~ /<pre id="copy_paste_links"[^>]*>(.*)<\/pre>/m
     $1.strip.split("\n").each{|url|
       yield url
     }
  end
end

def rapidshare_urls(query, num=nil, sort=nil, size=nil)
  if num
    if num / 10 > 1
      result = []
      (2..num/10+1).each {|page|
         filestube_urls(query, page, sort, size).each {|url|
            extract_urls(get(url)) {|url|
              yield url
            }
         }
      }
    else
      filestube_urls(query, nil, sort, size)[0..num-1].each {|url|
        extract_urls(get(url)) {|url|
          yield url
        }
      }
    end
  else
    filestube_urls(query, nil, sort, size).each {|page|
      extract_urls(get(page)) {|url|
       yield url
      }
    }
  end
end


def usage()
  puts "usage:"
  puts "rapidsearch.rb [-n number] [-o order] [-s size] [-h]"
  puts 
  puts "-n number of url packages"
  puts "-h this help screen"
  puts "-s size:"
  puts "      1 - <20MB"
  puts "      2 - 20MB - 200MB"
  puts "      3 - 200MB - 1GB"
  puts "      4 - >1GB"
  puts "-o sort by (default relevance):"
  puts "      pd - popularity"
  puts "      dd - date"
  puts "      sd - size"
end

params = ARGV.getopts('n:o:s:h')
if params['h']
  usage
  exit
elsif params['o'] and !["pd", "dd", "sd"].include?(params['o'])
  puts "Bad parameter -o #{params['o']}"
  usage
  exit
elsif params['s'] and ![1,2,3,4].include?(params['s'].to_i)
  puts "Bad size #{params['s']}"
  usage
end

query = ARGV.join(' ')
begin
  if params['n']
    rapidshare_urls(query, params['n'].to_i, params['o'], params['s']) {|url|
      puts url
      $stdout.flush
    }
  else
    rapidshare_urls(query, nil, params['o'], params['s']) {|url|
      puts url
      $stdout.flush
    }
  end
rescue Interrupt, Errno::EINTR
  exit(0)
end

You can use this like this:

rapidsearch.rb -n 10 Matrix

this will display urls for 10 packages containgin word “Matrix”.

-o option is for size of packages (1 – 4)

-s option is for sorting it must be one of (dd – by data, pd – by popularity, sd – by size)

You can download this script from here.

You can combine this script with for validating rapidshare links.


rapidsearch.rb -n 5 -s 3 Matrix | rapidtest.rb -f -