Очистка метаданных YouTube с помощью API Google и Pafy - PullRequest
0 голосов
/ 07 июня 2019

Я пытаюсь очистить метаданные видео YouTube, которые доступны на канале, такие как videoid, v.title, v.length, v.keywords, v.thumb, v.bigthumb, v.bigthumbhd, v.описание, v.author, v.published, v.viewcount, v.duration, v.likes, v.dislikes, v.rating, v.category, commentid, комментарии, комментариев, v.username, видеопотоков, video_resolution, video_extension,video_filesize, video_url, best_video.resolution, best_video.extension, best_video.url, audio_bitrate, audio_extension, audio_filesize, best_audio.bitrate, best_audio.extension, al_mediatype, al_extension, al_quality, количество ответов * 1001, количество ответов * 100 *, теги, comment2ount * 100Я пытался использовать google API и pafy для очистки метаданных о списке видео на канале.

Мой фрагмент кода

from apiclient.discovery import build
import logging
logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
from apiclient.errors import HttpError
from oauth2client.tools import argparser
import pafy
import csv
import urllib.request
import urllib.error
import pandas as pd
import sys
from importlib import reload
reload(sys)
#sys.setdefaultencoding("utf-8")

csv_list = []
filename = '/Users/Documents/sanmashatesting.csv'

df = pd.read_fwf('/Users/Documents/Masha.txt',names=["VideoID"])

DEVELOPER_KEY = "XXXXXXXX"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
pafy.set_api_key("XXXXXXXXX")

youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,developerKey=DEVELOPER_KEY)

with open(filename, 'a') as f:
    writer = csv.writer(f)
    writer.writerow(("vID","title","length","keywords","thumb","bigthumb","bigthumbhd","description","author",
                     "published","viewcount", "duration", "likes", "dislikes","rating","category","commentid","comments","commentlike",
                     "username","video_stream","video_resolution","video_extension","video_filesize","video_url",
                     "bestvideo_resolution","bestvideo_extension","bestvideo_url","audio_bitrate","audio_extension",
                     "audio_filesize","bestaudio_bitrate","bestaudio_extension","al_mediatype","al_extension",
                     "al_quality","repliescount","tags","commentcount"))


    for ids in df["VideoID"]:
        v = pafy.new(ids)
        videoid = v.videoid

        videostreams = v.streams
        best_video = v.getbest()

        audiostreams = v.audiostreams
        best_audio = v.getbestaudio()

        allstreams = v.allstreams

        for video in videostreams:

            video_resolution = video.resolution
            video_extension = video.extension
            video_filesize = video.get_filesize()
            video_url = video.url

        for audio in audiostreams:

            audio_bitrate = audio.bitrate
            audio_extension = audio.extension
            audio_filesize = audio.get_filesize()


        for al in allstreams:

            al_mediatype = al.mediatype
            al_extension = al.extension
            al_quality = al.quality

        results = youtube.commentThreads().list(
                    part="id,snippet",
                    maxResults=100,
                    order='time',
                    videoId=videoid,
                    textFormat="plainText"
                ).execute()

        result = youtube.videos().list(
                    id = videoid,
                    part = "id,snippet",
                ).execute()

        for item in results["items"]:    
            parent_id = item["snippet"]["topLevelComment"]["id"]

        res = youtube.comments().list(
                        part="snippet",
                        parentId=parent_id,
                        textFormat="plainText"
                        ).execute()

        result_commentcount = youtube.videos().list(
                        id = videoid,
                        part = "statistics",
                        ).execute()

        totalResults = 0
        totalResults = int(results["pageInfo"]["totalResults"])
        count = 0
        nextPageToken = ''
        comments = []
        replies = []
        further = True
        first = True
        while further:
            halt = False
            if first == False:
                print(".")

                try:
                        results = youtube.commentThreads().list(
                        part="id,snippet",
                        maxResults=100,
                        order='time',
                        videoId=videoid,
                        #textFormat="plainText",
                        #pageToken=nextPageToken,
                        ).execute()


                        result = youtube.videos().list(
                        id = videoid,
                        part = "id,snippet",
                        ).execute()

                        result_commentcount = youtube.videos().list(
                        id = videoid,
                        part = "statistics",
                        ).execute()

                        res = youtube.comments().list(
                        part="snippet",
                        parentId=parent_id,
                        textFormat="plainText"
                        ).execute()

                        totalResults = int(results["pageInfo"]["totalResults"])
                except urllib.error.HTTPError as e:
                    print("An HTTP error %d occurred:\n%s" % (e.resp.status, e.content))
                    halt = True
            if halt == False:
                count += totalResults

                commentcount = ''
                repliescount = ''
                for item in result_commentcount["items"]:
                    commentcount = item["statistics"]["commentCount"]

                for item in result["items"]:
                    if "tags" in item["snippet"]:
                        tags = item["snippet"]["tags"]
                    else:
                        tags = "no tags"
                    #print(tags)


                for item in results["items"]:
                    if commentcount == 0:
                        commentid = "comments are disabled"   
                        commentlike = "comments are disabled"
                        comments  = "comments are disabled"

                    else:    
                        comment = item["snippet"]["topLevelComment"]
                        #print(comment)
                        author = comment["snippet"]["authorDisplayName"]
                        text = comment["snippet"]["textDisplay"]
                        commentid = item["snippet"]["topLevelComment"]["id"]
                        commentlike = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
                        comments.append([author,text])                   
                    #commentdislike = item["snippet"]["topLevelComment"]["snippet"]["dislikeCount"]
                    repliescount = item["snippet"]["totalReplyCount"]


                for item in res["items"]:
                    if "repliescount" == 0:
                        replies  = "replies are disabled"
                    else:
                        author = item["snippet"]["authorDisplayName"]
                        text = item["snippet"]["textDisplay"]
                        replies.append([author, text])



                if totalResults < 100:
                    further = False
                    first = False
                else:
                    further = True
                    first = False
                    try:
                        nextPageToken = results["nextPageToken"]
                    except urllib.error.KeyError as e:
                        print("An KeyError error occurred: %s" % (e))
                        further = False

# Adding the full data to CSV

        csv_list.append(["vID","title","length","keywords","thumb","bigthumb","bigthumbhd","description","author",
                         "published","viewcount", "duration", "likes", "dislikes","rating","category","commentid","comments","commentlike",
                         "username","video_stream","video_resolution","video_extension","video_filesize","video_url",
                         "bestvideo_resolution","bestvideo_extension","bestvideo_url","audio_bitrate",
                         "audio_extension","audio_filesize","bestaudio_bitrate","bestaudio_extension",
                         "al_mediatype","al_extension","al_quality","repliescount","tags","commentcount","replies"])

        writer.writerow([videoid,v.title,v.length,v.keywords,v.thumb,v.bigthumb,v.bigthumbhd,v.description,v.author,
                         v.published,v.viewcount, v.duration, v.likes, v.dislikes,v.rating,v.category,commentid,comments,commentlike,
                         v.username,videostreams,video_resolution,video_extension,video_filesize,video_url,
                         best_video.resolution,best_video.extension,best_video.url,audio_bitrate,audio_extension,
                         audio_filesize,best_audio.bitrate,best_audio.extension,al_mediatype,al_extension,al_quality,repliescount,tags,commentcount,replies])

После этого я получаю данные для 30 видеоон продолжает работать без какой-либо идентификации, и после долгого времени он внезапно останавливается и выдает ошибку, которая упоминается ниже.

HttpError: <HttpError 400 when requesting https://www.googleapis.com/youtube/v3/commentThreads?part=id%2Csnippet&maxResults=100&order=relevance&videoId=GxWLe7mONMg&textFormat=plainText&pageToken=QURTSl9pMG1VTjJOd25VUWs5b2FVazNPUGJIMEMwaDJpSkdyX3g1ZC1xZEFyLVVQWEY5b3g0VmlOdDk1U1lTbmpsQS1pa3R0Y1VzaXhCS0ZmTmEtdmpvVXlnT1M4RFN0RTV5TFNReS1Xc2Y5TEJZNGxKdl82NWtkWWpJSGhwWXY%3D&key=AIzaSyDoT9ySV0isd5WraiP9HMDI5ZTuQRiOuCg&alt=json returned "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the requests input is invalid. Check the structure of the <code>commentThread</code> resource in the request body to ensure that it is valid.">

Я пытался изучить этоошибка и получил это ниже ссылка

Не удается создать ответы на некоторые существующие комментарии YouTube

Я не уверен, где я ошибся.Пожалуйста, помогите предложить решить эту проблему.

...