diff options
-rw-r--r-- | youtube/search.py | 95 |
1 files changed, 2 insertions, 93 deletions
diff --git a/youtube/search.py b/youtube/search.py index 5982d9b..9596727 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -5,7 +5,7 @@ from string import Template import base64 from math import ceil from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN -import youtube.common as common +from youtube import common, proto with open("yt_search_results_template.html", "r") as file: yt_search_results_template = file.read() @@ -15,26 +15,6 @@ with open("yt_search_template.html", "r") as file: page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') current_page_button_template = Template('''<div class="page-button">$page</div>''') -video_result_template = ''' - <div class="medium-item"> - <a class="video-thumbnail-box" href="$video_url" title="$video_title"> - <img class="video-thumbnail-img" src="$thumbnail_url"> - <span class="video-duration">$length</span> - </a> - - <a class="title" href="$video_url">$video_title</a> - - <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address> - <span class="views">$views</span> - - - <time datetime="$datetime">Uploaded $upload_date</time> - - <span class="description">$description</span> - </div> -''' - - # Sort: 1 # Upload date: 2 @@ -62,22 +42,7 @@ features = { def page_number_to_sp_parameter(page): offset = (int(page) - 1)*20 # 20 results per page - first_byte = 255 & offset - second_byte = 255 & (offset >> 7) - second_byte = second_byte | 1 - - # 0b01001000 is required, and is always the same. - # The next 2 bytes encode the offset in little endian order, - # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part - # of the offset. Instead, to get the number which the two bytes encode, that LSB - # of the second byte is combined with the most significant bit (MSB) of the first byte - # in a logical AND. Replace the two bits with the result of the AND to get the two little endian - # bytes that represent the offset. - # I figured this out by trial and error on the sp parameter. I don't know why it's done like this; - # perhaps it's just obfuscation. - param_bytes = bytes((0b01001000, first_byte, second_byte)) - param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes)) - return param_encoded + return base64.urlsafe_b64encode(proto.uint(9, offset) + proto.string(61, b'')).decode('ascii') def get_search_json(query, page): url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query) @@ -94,48 +59,6 @@ def get_search_json(query, page): info = json.loads(content) return info -"""def get_search_info(query, page): - result_info = dict() - info = get_bloated_search_info(query, page) - - estimated_results = int(info[1]['response']['estimatedResults']) - estimated_pages = ceil(estimated_results/20) - result_info['estimated_results'] = estimated_results - result_info['estimated_pages'] = estimated_pages - - result_info['results'] = [] - # this is what you get when you hire H-1B's - video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] - - - for video_json_crap in video_list: - # they have a dictionary whose only content is another dictionary... - try: - type = list(video_json_crap.keys())[0] - except KeyError: - continue #channelRenderer or playlistRenderer - '''description = "" - for text_run in video_json_crap["descriptionSnippet"]["runs"]: - if text_run.get("bold", False): - description += "<b>" + html.escape''' - try: - result_info['results'].append({ - "title": video_json_crap["title"]["simpleText"], - "video_id": video_json_crap["videoId"], - "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text - "thumbnail": get_thumbnail_url(video_json_crap["videoId"]), - "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'], - "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length - "uploader": video_json_crap['longBylineText']['runs'][0]['text'], - "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], - "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''), - - }) - except KeyError: - print(video_json_crap) - raise - return result_info""" - def page_buttons_html(page_start, page_end, current_page, query): result = "" @@ -195,20 +118,6 @@ def get_search_page(query_string, parameters=()): ) continue result_list_html += common.renderer_html(renderer, current_query_string=query_string) - '''type = list(result.keys())[0] - result = result[type] - if type == "showingResultsForRenderer": - url = URL_ORIGIN + "/search" - if len(parameters) > 0: - url += ';' + ';'.join(parameters) - url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items()) - - result_list_html += showing_results_for_template.substitute( - corrected_query=common.format_text_runs(result['correctedQuery']['runs']), - - ) - else: - result_list_html += common.html_functions[type](result)''' page = int(page) if page <= 5: |