Я хотел бы рассказать вам, как соскрести с нескольких страниц.
Указанные ниже коды основаны на list_1
, и я хотел бы получить электронное письмо с list_2
, прикрепленным к названию компании.
Кроме того, как это можно написать, я не понимаю.
последний r + 9 digits
из https://job.rikunabi.com/2019/company/r294900083/
в URL list_2
list_1
def get_data(uri, companies)
html = open(uri).read
documents = Nokogiri::HTML(html)
#byebug
documents.xpath("//a[@class='ts-h-search-cassetteTitleMain js-h-search-cassetteTitleMain']").each {|n| companies << n.text.strip}
return companies
end
def main()
uri = "https://job.rikunabi.com/2019/s/__13_0_______/"
puts "What is the maximum page? "
page = gets.to_i
companies = []
data = get_data(uri,companies)
(2..page).to_a.each do |idx|
uri = "https://job.rikunabi.com/2019/s/__13_0_______/?moduleCd=2&isc=ps054&pn=#{idx}"
data = get_data(uri,companies)
end
len = [companies.size].min - 1
headers = ["company_name"]
time = Time.new.strftime("%Y-%m-%d")
CSV.open("rikunabi_tokyo_2019-#{time}.csv", "a",headers: headers, write_headers: true) do |csv|
(0..len).to_a.each do |idx|
csv_column_values = [companies[idx]]
csv << csv_column_values
end
end
end
if __FILE__ == $0
puts("Process Start")
main()
puts("Process Finished")
end
list_2
def get_data(uri, data)
companies = data[0]
parameters = data[1]
html = open(uri).read
documents = Nokogiri::HTML(html.toutf8, nil, 'utf-8')
companies << documents.xpath("//h1[@class='ts-h-company-mainTitle']").text
parameters << documents.xpath("//div[@class='ts-h-company-sentence']")[1].text.strip.gsub(/(\r)/, " ")
return [companies, parameters]
end
def main()
uri = "https://job.rikunabi.com/2019/company/r294900083/"
puts "What is the maximum page? "
page = gets.to_i
companies = []
parameters = []
data = [companies, parameters]
data = get_data(uri,data)
(2..page).to_a.each do |idx|
uri = "https://job.rikunabi.com/2019/company/r294900083/"
data = get_data(uri,data)
end
len = [companies.size,parameters.size].min - 1
headers = ["company_name","Email"]
time = Time.new.strftime("%Y-%m-%d")
CSV.open("rikunabi_tokyo_2019-#{time}.csv", "a",headers: headers, write_headers: true) do |csv|
(0..len).to_a.each do |idx|
csv_column_values = [companies[idx], parameters[idx]]
csv << csv_column_values
end
end
end
if __FILE__ == $0
puts("Process Start")
main()
puts("Process Finished")
end