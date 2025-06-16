import json import requests class YouTubeSearcher : def __init__ ( self ) : self . url = "https://www.youtube.com/youtubei/v1/search" self . context = { "client" : { "clientName" : "WEB" , "clientVersion" : "2.20250620.01.00" } } def search ( self , query , max_videos = 20 ) : """Search YouTube and return video data with pagination support.""" videos = [ ] continuation = None seen_ids = set ( ) while len ( videos ) < max_videos : payload = { "context" : self . context , "query" : query } if continuation : payload [ "continuation" ] = continuation try : data = requests . post ( self . url , json = payload , timeout = 10 ) . json ( ) new_videos = self . _get_videos ( data ) unique_videos = [ ] for video in new_videos : if video [ "id" ] not in seen_ids : seen_ids . add ( video [ "id" ] ) unique_videos . append ( video ) videos . extend ( unique_videos ) continuation = self . _get_continuation ( data ) if not continuation or not unique_videos : break except Exception as e : print ( f"Request failed: { e } " ) break if not videos : print ( "No videos found for this query" ) return videos [ : max_videos ] def _get_videos ( self , data ) : """Extract all videos from API response.""" videos = [ ] self . _find_videos ( data , videos ) return videos def _find_videos ( self , obj , videos ) : """Recursively search through nested YouTube response data to find video objects.""" if isinstance ( obj , dict ) : if "videoRenderer" in obj : video = self . _parse_video ( obj [ "videoRenderer" ] ) if video : videos . append ( video ) else : for v in obj . values ( ) : self . _find_videos ( v , videos ) elif isinstance ( obj , list ) : for item in obj : self . _find_videos ( item , videos ) def _parse_video ( self , r ) : """Extract video information from YouTube's video renderer object.""" try : def text ( obj ) : """Helper to extract text from YouTube's text objects.""" if not obj : return "" if "simpleText" in obj : return obj [ "simpleText" ] if "runs" in obj and obj [ "runs" ] : return obj [ "runs" ] [ 0 ] . get ( "text" , "" ) return "" video_id = r . get ( "videoId" , "" ) title = text ( r . get ( "title" ) ) channel = text ( r . get ( "longBylineText" ) ) views = text ( r . get ( "viewCountText" ) ) duration = text ( r . get ( "lengthText" ) ) published = text ( r . get ( "publishedTimeText" ) ) thumbnail = "" if "thumbnail" in r and "thumbnails" in r [ "thumbnail" ] : thumbnails = r [ "thumbnail" ] [ "thumbnails" ] if thumbnails : thumbnail = thumbnails [ - 1 ] . get ( "url" , "" ) description = "" if "detailedMetadataSnippets" in r and r [ "detailedMetadataSnippets" ] : snippet = r [ "detailedMetadataSnippets" ] [ 0 ] if "snippetText" in snippet and "runs" in snippet [ "snippetText" ] : desc_parts = [ run . get ( "text" , "" ) for run in snippet [ "snippetText" ] [ "runs" ] ] description = "" . join ( desc_parts ) [ : 200 ] return { "id" : video_id , "title" : title or "No title" , "url" : f"https://www.youtube.com/watch?v= { video_id } " , "channel" : channel or "Unknown" , "views" : views or "No views" , "duration" : duration or "Unknown" , "published" : published or "Unknown" , "thumbnail" : thumbnail , "description" : description , } except Exception as e : print ( f"Failed to parse video: { e } " ) return None def _get_continuation ( self , obj ) : """Find the continuation token for loading the next page of results.""" if isinstance ( obj , dict ) : if "continuationCommand" in obj and "token" in obj [ "continuationCommand" ] : return obj [ "continuationCommand" ] [ "token" ] for v in obj . values ( ) : result = self . _get_continuation ( v ) if result : return result elif isinstance ( obj , list ) : for item in obj : result = self . _get_continuation ( item ) if result : return result return None def save ( self , videos , filename = "youtube_search.json" ) : """Save video results to a JSON file with proper Unicode support.""" with open ( filename , "w" , encoding = "utf-8" ) as f : json . dump ( videos , f , indent = 2 , ensure_ascii = False ) print ( f"Saved { len ( videos ) } videos" ) if __name__ == "__main__" : searcher = YouTubeSearcher ( ) videos = searcher . search ( "what is mcp" , 10 ) searcher . save ( videos )