Создается много CSV-файлов (рабочих книг). Я хочу, чтобы они были в виде одного CSV-файла (под одной рабочей книгой на одном листе). (При веб-сканировании) - PullRequest
0 голосов
/ 07 января 2019

В моем коде генерируется много CSV-файлов. Я хочу их как один CSV-файл. Пять партий всех лет должны быть выполнены в одном файле CSV. В основном я пытаюсь выполнить сканирование в Интернете и создать временную серию опросов для этих (CDU, SPD, FDP, GRUNEN, LEFT) сторон.

  import scrapy
  import re
  import csv


  class VoteSpider(scrapy.Spider):
  name = 'VoteSpider'
  start_urls = ['https://www.wahlrecht.de/umfragen/forsa.htm'] 

def __init__(self):  
    self.CDU = {}
    self.SPD = {}  
    self.FDP = {}  
    self.Green = {}  
    self.left = {}  


def parse(self, response):
    regex = r"[forsa]+[\/]+[0-9]+.htm"
    tableBody = response.xpath('//*[@class="wilko"]//tbody')
    hxs = scrapy.Selector(response)
    all_links = hxs.xpath('*//a/@href').extract()
    yearLinks = []
    for link in all_links:
        matches = re.search(regex, link, re.MULTILINE)
        if matches:
            yearLinks.append(link)
    for link in yearLinks:
        newlink = "https://www.wahlrecht.de/umfragen/"+ link
        yield scrapy.Request(url = newlink, callback=self.parseLink, meta={'name':link})
    self.parseTable(tableBody)

def parseTable(self,tableBody):
    CDU= []
    SPD = []
    FDP= []
    Green= []
    left= []

    rows = tableBody.xpath('//tr')
    del rows[:5]
    for row in rows:
        CDU.append(row.xpath('td//text()')[2].extract())
        SPD.append(row.xpath('td//text()')[3].extract())
        Green.append(row.xpath('td//text()')[4].extract())
        FDP.append(row.xpath('td//text()')[5].extract())
        left.append(row.xpath('td//text()')[6].extract())

    with open('CDU'+'Current'+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('SPD'+'Current'+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('left'+'Current'+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('Green'+'Current'+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('FDP'+'Current'+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)

    self.CDU['Current'] = [] 
    self.SPD['Current'] = [] 
    self.Green['Current'] = [] 
    self.FDP['Current'] = [] 
    self.left['Current'] = [] 


    self.CDU['Current'].append(CDU)
    self.SPD['Current'].append(SPD)
    self.Green['Current'].append(Green)
    self.FDP['Current'].append(FDP)
    self.left['Current'].append(left)

def parseLink(self, response):
    CDU= []
    SPD = []
    FDP= []
    Green= []
    left= []  

    name = response.meta.get('name')
    yearNumber = re.findall('\d+',name)[0]
    x = 0
    if yearNumber == '2007':
        x = 4
    elif yearNumber == '1998':
        x = 3
    elif yearNumber == '1999':
        x = 3
    elif yearNumber == '2000':
        x = 3
    elif yearNumber == '2001':
        x = 3
    elif yearNumber == '2002':
        x = 3
    elif yearNumber == '2003':
        x = 3
    elif yearNumber == '2004':
        x = 3
    elif yearNumber == '2005':
        x = 5
    elif yearNumber == '2006':
        x = 3
    elif yearNumber == '2008':
        x = 4
    elif yearNumber ==  '2013':
        x = 4
    tableBody = response.xpath('//*[@class="wilko"]//tbody')
    rows = tableBody.xpath('//tr')
    del rows[:x]
    for row in rows:
        CDU.append(row.xpath('td//text()')[2].extract())
        SPD.append(row.xpath('td//text()')[3].extract())
        Green.append(row.xpath('td//text()')[4].extract())
        # print(row.xpath('td//text()').extract())
        FDP.append(row.xpath('td//text()')[5].extract())
        left.append(row.xpath('td//text()')[6].extract())

    with open('CDU'+yearNumber+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('SPD'+yearNumber+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('left'+yearNumber+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('Green'+yearNumber+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)
    with open('FDP'+yearNumber+'.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(CDU)

    self.CDU[yearNumber]= [] 
    self.SPD[yearNumber] = [] 
    self.Green[yearNumber] = [] 
    self.FDP[yearNumber] = [] 
    self.left[yearNumber] = [] 

    self.CDU[yearNumber].append(CDU)
    self.SPD[yearNumber].append(SPD)
    self.Green[yearNumber].append(Green)
    self.FDP[yearNumber].append(FDP)
    self.left[yearNumber].append(left)

Я хочу, чтобы ожидаемый результат был таким, как все партии CDU, SPD, GRUNEN, FDP, LEFT всех лет в одном файле CSV

1 Ответ

0 голосов
/ 07 января 2019

Вместо того, чтобы открывать несколько файлов, вы можете добавить один файл, например так:

...

with open('ALL'+yearNumber+'.csv', 'a+') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerows(CDU)
    writer.writerows(SPD)
    writer.writerows(left)
    writer.writerows(Green)
    writer.writerows(FDP)

...
...