From 79937c1c823f998a1d6bb324901fd13b483b3607 Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Mon, 2 Jul 2018 17:45:25 -0700
Subject: fix line endings

---
 youtube/search.py | 460 +++++++++++++++++++++++++++---------------------------
 1 file changed, 230 insertions(+), 230 deletions(-)

(limited to 'youtube/search.py')
diff --git a/youtube/search.py b/youtube/search.py
index 5268dbe..5982d9b 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -1,231 +1,231 @@
-import json
-import urllib
-import html
-from string import Template
-import base64
-from math import ceil
-from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
-import youtube.common as common
-
-with open("yt_search_results_template.html", "r") as file:
-    yt_search_results_template = file.read()
-    
-with open("yt_search_template.html", "r") as file:
-    yt_search_template = file.read()
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-video_result_template = '''
-                <div class="medium-item">
-                    <a class="video-thumbnail-box" href="$video_url" title="$video_title">
-                        <img class="video-thumbnail-img" src="$thumbnail_url">
-                        <span class="video-duration">$length</span>
-                    </a>
-
-                    <a class="title" href="$video_url">$video_title</a>
-                    
-                    <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address>
-                    <span class="views">$views</span>
-
-
-                    <time datetime="$datetime">Uploaded $upload_date</time>
-
-                    <span class="description">$description</span>
-                </div>
-'''
-
-
-
-# Sort: 1
-    # Upload date: 2
-    # View count: 3
-    # Rating: 1
-# Offset: 9
-# Filters: 2
-    # Upload date: 1
-    # Type: 2
-    # Duration: 3
-
-
-features = {
-    '4k': 14,
-    'hd': 4,
-    'hdr': 25,
-    'subtitles': 5,
-    'creative_commons': 6,
-    '3d': 7,
-    'live': 8,
-    'purchased': 9,
-    '360': 15,
-    'location': 23,
-}
-
-def page_number_to_sp_parameter(page):
-    offset = (int(page) - 1)*20    # 20 results per page
-    first_byte = 255 & offset
-    second_byte = 255 & (offset >> 7)
-    second_byte = second_byte | 1
-    
-    # 0b01001000 is required, and is always the same.
-    # The next 2 bytes encode the offset in little endian order,
-    #  BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
-    #  of the offset. Instead, to get the number which the two bytes encode, that LSB
-    #  of the second byte is combined with the most significant bit (MSB) of the first byte
-    #  in a logical AND. Replace the two bits with the result of the AND to get the two little endian
-    #  bytes that represent the offset.
-    # I figured this out by trial and error on the sp parameter. I don't know why it's done like this;
-    #  perhaps it's just obfuscation.
-    param_bytes = bytes((0b01001000, first_byte, second_byte))
-    param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes))
-    return param_encoded
-
-def get_search_json(query, page):
-    url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
-    headers = {
-        'Host': 'www.youtube.com',
-        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
-        'Accept': '*/*',
-        'Accept-Language': 'en-US,en;q=0.5',
-        'X-YouTube-Client-Name': '1',
-        'X-YouTube-Client-Version': '2.20180418',
-    }
-    url += "&pbj=1&sp=" + page_number_to_sp_parameter(page)
-    content = common.fetch_url(url, headers=headers)
-    info = json.loads(content)
-    return info
-    
-"""def get_search_info(query, page):
-    result_info = dict()
-    info = get_bloated_search_info(query, page)
-    
-    estimated_results = int(info[1]['response']['estimatedResults'])
-    estimated_pages = ceil(estimated_results/20)
-    result_info['estimated_results'] = estimated_results
-    result_info['estimated_pages'] = estimated_pages
-    
-    result_info['results'] = []
-    # this is what you get when you hire H-1B's
-    video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
-    
-    
-    for video_json_crap in video_list:
-        # they have a dictionary whose only content is another dictionary...
-        try:
-            type = list(video_json_crap.keys())[0]
-        except KeyError:
-            continue    #channelRenderer or playlistRenderer
-        '''description = ""
-        for text_run in video_json_crap["descriptionSnippet"]["runs"]:
-            if text_run.get("bold", False):
-                description += "<b>" + html.escape'''
-        try:
-            result_info['results'].append({
-                "title": video_json_crap["title"]["simpleText"],
-                "video_id": video_json_crap["videoId"],
-                "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]),   # a list of text runs (formmated), rather than plain text
-                "thumbnail": get_thumbnail_url(video_json_crap["videoId"]),
-                "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'],
-                "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
-                "uploader": video_json_crap['longBylineText']['runs'][0]['text'],
-                "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
-                "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
-                
-            })
-        except KeyError:
-            print(video_json_crap)
-            raise
-    return result_info"""
-    
-
-def page_buttons_html(page_start, page_end, current_page, query):
-    result = ""
-    for page in range(page_start, page_end+1):
-        if page == current_page:
-            template = current_page_button_template
-        else:
-            template = page_button_template
-        result += template.substitute(page=page, href=URL_ORIGIN + "/search?query=" + urllib.parse.quote_plus(query) + "&page=" + str(page))
-    return result
-
-showing_results_for = Template('''
-                <div>Showing results for <a>$corrected_query</a></div>
-                <div>Search instead for <a href="$original_query_url">$original_query</a></div>
-''')
-did_you_mean = Template('''
-                <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
-''')    
-def get_search_page(query_string, parameters=()):
-    qs_query = urllib.parse.parse_qs(query_string)
-    if len(qs_query) == 0:
-        return yt_search_template
-    query = qs_query["query"][0]
-    page = qs_query.get("page", "1")[0]
-
-    info = get_search_json(query, page)
-    
-    estimated_results = int(info[1]['response']['estimatedResults'])
-    estimated_pages = ceil(estimated_results/20)
-    results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
-    
-    corrections = ''
-    result_list_html = ""
-    for renderer in results:
-        type = list(renderer.keys())[0]
-        if type == 'shelfRenderer':
-            continue
-        if type == 'didYouMeanRenderer':
-            renderer = renderer[type]
-            corrected_query_string = urllib.parse.parse_qs(query_string)
-            corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
-            corrected_query_url = URL_ORIGIN + '/search?' + common.make_query_string(corrected_query_string)
-            corrections = did_you_mean.substitute(
-                corrected_query_url = corrected_query_url,
-                corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
-            )
-            continue
-        if type == 'showingResultsForRenderer':
-            renderer = renderer[type]
-            no_autocorrect_query_string = urllib.parse.parse_qs(query_string)
-            no_autocorrect_query_string['autocorrect'] = ['0']
-            no_autocorrect_query_url = URL_ORIGIN + '/search?' + common.make_query_string(no_autocorrect_query_string)
-            corrections = showing_results_for.substitute(
-                corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
-                original_query_url = no_autocorrect_query_url,
-                original_query = html.escape(renderer['originalQuery']['simpleText']),
-            )
-            continue
-        result_list_html += common.renderer_html(renderer, current_query_string=query_string)
-        '''type = list(result.keys())[0]
-        result = result[type]
-        if type == "showingResultsForRenderer":
-            url = URL_ORIGIN + "/search"
-            if len(parameters) > 0:
-                url += ';' + ';'.join(parameters)
-            url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items())
-            
-            result_list_html += showing_results_for_template.substitute(
-                corrected_query=common.format_text_runs(result['correctedQuery']['runs']),
-            
-            )
-        else:
-            result_list_html += common.html_functions[type](result)'''
-        
-    page = int(page)
-    if page <= 5:
-        page_start = 1
-        page_end = min(9, estimated_pages)
-    else:
-        page_start = page - 4
-        page_end = min(page + 4, estimated_pages)
-        
-    
-    result = Template(yt_search_results_template).substitute(
-        results             = result_list_html, 
-        page_title          = query + " - Search", 
-        search_box_value    = html.escape(query),
-        number_of_results   = '{:,}'.format(estimated_results),
-        number_of_pages     = '{:,}'.format(estimated_pages),
-        page_buttons        = page_buttons_html(page_start, page_end, page, query),
-        corrections         = corrections
-        )
+import json
+import urllib
+import html
+from string import Template
+import base64
+from math import ceil
+from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
+import youtube.common as common
+
+with open("yt_search_results_template.html", "r") as file:
+    yt_search_results_template = file.read()
+    
+with open("yt_search_template.html", "r") as file:
+    yt_search_template = file.read()
+
+page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
+current_page_button_template = Template('''<div class="page-button">$page</div>''')
+video_result_template = '''
+                <div class="medium-item">
+                    <a class="video-thumbnail-box" href="$video_url" title="$video_title">
+                        <img class="video-thumbnail-img" src="$thumbnail_url">
+                        <span class="video-duration">$length</span>
+                    </a>
+
+                    <a class="title" href="$video_url">$video_title</a>
+                    
+                    <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address>
+                    <span class="views">$views</span>
+
+
+                    <time datetime="$datetime">Uploaded $upload_date</time>
+
+                    <span class="description">$description</span>
+                </div>
+'''
+
+
+
+# Sort: 1
+    # Upload date: 2
+    # View count: 3
+    # Rating: 1
+# Offset: 9
+# Filters: 2
+    # Upload date: 1
+    # Type: 2
+    # Duration: 3
+
+
+features = {
+    '4k': 14,
+    'hd': 4,
+    'hdr': 25,
+    'subtitles': 5,
+    'creative_commons': 6,
+    '3d': 7,
+    'live': 8,
+    'purchased': 9,
+    '360': 15,
+    'location': 23,
+}
+
+def page_number_to_sp_parameter(page):
+    offset = (int(page) - 1)*20    # 20 results per page
+    first_byte = 255 & offset
+    second_byte = 255 & (offset >> 7)
+    second_byte = second_byte | 1
+    
+    # 0b01001000 is required, and is always the same.
+    # The next 2 bytes encode the offset in little endian order,
+    #  BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
+    #  of the offset. Instead, to get the number which the two bytes encode, that LSB
+    #  of the second byte is combined with the most significant bit (MSB) of the first byte
+    #  in a logical AND. Replace the two bits with the result of the AND to get the two little endian
+    #  bytes that represent the offset.
+    # I figured this out by trial and error on the sp parameter. I don't know why it's done like this;
+    #  perhaps it's just obfuscation.
+    param_bytes = bytes((0b01001000, first_byte, second_byte))
+    param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes))
+    return param_encoded
+
+def get_search_json(query, page):
+    url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
+    headers = {
+        'Host': 'www.youtube.com',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
+        'Accept': '*/*',
+        'Accept-Language': 'en-US,en;q=0.5',
+        'X-YouTube-Client-Name': '1',
+        'X-YouTube-Client-Version': '2.20180418',
+    }
+    url += "&pbj=1&sp=" + page_number_to_sp_parameter(page)
+    content = common.fetch_url(url, headers=headers)
+    info = json.loads(content)
+    return info
+    
+"""def get_search_info(query, page):
+    result_info = dict()
+    info = get_bloated_search_info(query, page)
+    
+    estimated_results = int(info[1]['response']['estimatedResults'])
+    estimated_pages = ceil(estimated_results/20)
+    result_info['estimated_results'] = estimated_results
+    result_info['estimated_pages'] = estimated_pages
+    
+    result_info['results'] = []
+    # this is what you get when you hire H-1B's
+    video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
+    
+    
+    for video_json_crap in video_list:
+        # they have a dictionary whose only content is another dictionary...
+        try:
+            type = list(video_json_crap.keys())[0]
+        except KeyError:
+            continue    #channelRenderer or playlistRenderer
+        '''description = ""
+        for text_run in video_json_crap["descriptionSnippet"]["runs"]:
+            if text_run.get("bold", False):
+                description += "<b>" + html.escape'''
+        try:
+            result_info['results'].append({
+                "title": video_json_crap["title"]["simpleText"],
+                "video_id": video_json_crap["videoId"],
+                "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]),   # a list of text runs (formmated), rather than plain text
+                "thumbnail": get_thumbnail_url(video_json_crap["videoId"]),
+                "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'],
+                "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
+                "uploader": video_json_crap['longBylineText']['runs'][0]['text'],
+                "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+                "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
+                
+            })
+        except KeyError:
+            print(video_json_crap)
+            raise
+    return result_info"""
+    
+
+def page_buttons_html(page_start, page_end, current_page, query):
+    result = ""
+    for page in range(page_start, page_end+1):
+        if page == current_page:
+            template = current_page_button_template
+        else:
+            template = page_button_template
+        result += template.substitute(page=page, href=URL_ORIGIN + "/search?query=" + urllib.parse.quote_plus(query) + "&page=" + str(page))
+    return result
+
+showing_results_for = Template('''
+                <div>Showing results for <a>$corrected_query</a></div>
+                <div>Search instead for <a href="$original_query_url">$original_query</a></div>
+''')
+did_you_mean = Template('''
+                <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
+''')    
+def get_search_page(query_string, parameters=()):
+    qs_query = urllib.parse.parse_qs(query_string)
+    if len(qs_query) == 0:
+        return yt_search_template
+    query = qs_query["query"][0]
+    page = qs_query.get("page", "1")[0]
+
+    info = get_search_json(query, page)
+    
+    estimated_results = int(info[1]['response']['estimatedResults'])
+    estimated_pages = ceil(estimated_results/20)
+    results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
+    
+    corrections = ''
+    result_list_html = ""
+    for renderer in results:
+        type = list(renderer.keys())[0]
+        if type == 'shelfRenderer':
+            continue
+        if type == 'didYouMeanRenderer':
+            renderer = renderer[type]
+            corrected_query_string = urllib.parse.parse_qs(query_string)
+            corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
+            corrected_query_url = URL_ORIGIN + '/search?' + common.make_query_string(corrected_query_string)
+            corrections = did_you_mean.substitute(
+                corrected_query_url = corrected_query_url,
+                corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
+            )
+            continue
+        if type == 'showingResultsForRenderer':
+            renderer = renderer[type]
+            no_autocorrect_query_string = urllib.parse.parse_qs(query_string)
+            no_autocorrect_query_string['autocorrect'] = ['0']
+            no_autocorrect_query_url = URL_ORIGIN + '/search?' + common.make_query_string(no_autocorrect_query_string)
+            corrections = showing_results_for.substitute(
+                corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
+                original_query_url = no_autocorrect_query_url,
+                original_query = html.escape(renderer['originalQuery']['simpleText']),
+            )
+            continue
+        result_list_html += common.renderer_html(renderer, current_query_string=query_string)
+        '''type = list(result.keys())[0]
+        result = result[type]
+        if type == "showingResultsForRenderer":
+            url = URL_ORIGIN + "/search"
+            if len(parameters) > 0:
+                url += ';' + ';'.join(parameters)
+            url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items())
+            
+            result_list_html += showing_results_for_template.substitute(
+                corrected_query=common.format_text_runs(result['correctedQuery']['runs']),
+            
+            )
+        else:
+            result_list_html += common.html_functions[type](result)'''
+        
+    page = int(page)
+    if page <= 5:
+        page_start = 1
+        page_end = min(9, estimated_pages)
+    else:
+        page_start = page - 4
+        page_end = min(page + 4, estimated_pages)
+        
+    
+    result = Template(yt_search_results_template).substitute(
+        results             = result_list_html, 
+        page_title          = query + " - Search", 
+        search_box_value    = html.escape(query),
+        number_of_results   = '{:,}'.format(estimated_results),
+        number_of_pages     = '{:,}'.format(estimated_pages),
+        page_buttons        = page_buttons_html(page_start, page_end, page, query),
+        corrections         = corrections
+        )
     return result
\ No newline at end of file
-- 
cgit v1.2.3