6 files changed, 456 insertions, 0 deletions
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..2694317
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,14 @@
+import pytest
+import urllib3
+import urllib
+import urllib.request
+import socket
+
+# https://realpython.com/pytest-python-testing/
+@pytest.fixture(autouse=True)
+def disable_network_calls(monkeypatch):
+    def stunted_get(*args, **kwargs):
+        raise RuntimeError('Network access not allowed during testing!')
+    monkeypatch.setattr(urllib.request, 'Request', stunted_get)
+    monkeypatch.setattr(urllib3.PoolManager, 'request', stunted_get)
+    monkeypatch.setattr(socket, 'socket', stunted_get)
diff --git a/tests/test_responses/429.html b/tests/test_responses/429.html
new file mode 100644
index 0000000..9bde0f9
--- /dev/null
+++ b/tests/test_responses/429.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head><meta http-equiv="content-type" content="text/html; charset=utf-8"><meta name="viewport" content="initial-scale=1"><title>https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999</title></head>
+<body style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;" onload="e=document.getElementById('captcha');if(e){e.focus();}">
+<div style="max-width:400px;">
+<hr noshade size="1" style="color:#ccc; background-color:#ccc;"><br>
+<form id="captcha-form" action="index" method="post">
+<script src="https://www.google.com/recaptcha/api.js" async defer></script>
+<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script>
+<div id="recaptcha" class="g-recaptcha" data-sitekey="6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b" data-callback="submitCallback" data-s="vJ20x5QPFGCo8r3XkMznOwMTCK8wPW_bLLhPDgo_I1cwF6xLuYZlq2G2wZPaSJiE8zx5YnaxJzFQGsyhY6NHQKMAaUTtSP6GAbPtueM35Jq3Hmk-gEAozXvvF0HIjK5oONT7F-06MwXDxA4HOqZyOEbsUG_8JjFcCklQjUNUVVItgyLpIbZ1dQ-IEtCXY5E3KDcgHGznfAyMGk_bby9uCpfxNTQwljGippKv1PIU7dI4d5LLpgBPWF0"></div>
+<input type='hidden' name='q' value='EhAgAUug_-oCrgAAAAAAAAoQGPe-9u8FIhkA8aeDS_-EXvhS86PaeaDvps8cqCssFqOzMgFy'><input type="hidden" name="continue" value="https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999">
+</form>
+<hr noshade size="1" style="color:#ccc; background-color:#ccc;">
+
+<div style="font-size:13px;">
+<b>About this page</b><br><br>
+
+Our systems have detected unusual traffic from your computer network.  This page checks to see if it&#39;s really you sending the requests, and not a robot.  <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br>
+
+<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;">
+This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop.  In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests.  If you share your network connection, ask your administrator for help &mdash; a different computer using the same IP address may be responsible.  <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
+</div>
+
+IP address: 2001:4ba0:ffea:2ae::a10<br>Time: 2019-12-21T04:28:41Z<br>URL: https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999<br>
+</div>
+</div>
+</body>
+</html>
diff --git a/tests/test_shorts.py b/tests/test_shorts.py
new file mode 100644
index 0000000..c413a8b
--- /dev/null
+++ b/tests/test_shorts.py
@@ -0,0 +1,265 @@
+"""Tests for YouTube Shorts tab support.
+
+Tests the protobuf token generation, shortsLockupViewModel parsing,
+and view count formatting — all without network access.
+"""
+import sys
+import os
+import base64
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+import youtube.proto as proto
+from youtube.yt_data_extract.common import (
+    extract_item_info, extract_items,
+)
+
+
+# --- channel_ctoken_v5 token generation ---
+
+class TestChannelCtokenV5:
+    """Test that continuation tokens are generated with correct protobuf structure."""
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        from youtube.channel import channel_ctoken_v5
+        self.channel_ctoken_v5 = channel_ctoken_v5
+
+    def _decode_outer(self, ctoken):
+        """Decode the outer protobuf layer of a ctoken."""
+        raw = base64.urlsafe_b64decode(f'{ctoken}==')
+        return {fn: val for _, fn, val in proto.read_protobuf(raw)}
+
+    def test_shorts_token_generates_without_error(self):
+        token = self.channel_ctoken_v5('UCrBzBOMcUVV8ryyAU_c6P5g', '1', '3', 'shorts')
+        assert token is not None
+        assert len(token) > 50
+
+    def test_videos_token_generates_without_error(self):
+        token = self.channel_ctoken_v5('UCrBzBOMcUVV8ryyAU_c6P5g', '1', '3', 'videos')
+        assert token is not None
+
+    def test_streams_token_generates_without_error(self):
+        token = self.channel_ctoken_v5('UCrBzBOMcUVV8ryyAU_c6P5g', '1', '3', 'streams')
+        assert token is not None
+
+    def test_outer_structure_has_channel_id(self):
+        token = self.channel_ctoken_v5('UCrBzBOMcUVV8ryyAU_c6P5g', '1', '3', 'shorts')
+        fields = self._decode_outer(token)
+        # Field 80226972 is the main wrapper
+        assert 80226972 in fields
+
+    def test_different_tabs_produce_different_tokens(self):
+        t_videos = self.channel_ctoken_v5('UCtest', '1', '3', 'videos')
+        t_shorts = self.channel_ctoken_v5('UCtest', '1', '3', 'shorts')
+        t_streams = self.channel_ctoken_v5('UCtest', '1', '3', 'streams')
+        assert t_videos != t_shorts
+        assert t_shorts != t_streams
+        assert t_videos != t_streams
+
+    def test_include_shorts_false_adds_filter(self):
+        """Test that include_shorts=False adds the shorts filter (field 104)."""
+        # Token with shorts included (default)
+        t_with_shorts = self.channel_ctoken_v5('UCtest', '1', '3', 'videos', include_shorts=True)
+        # Token with shorts excluded
+        t_without_shorts = self.channel_ctoken_v5('UCtest', '1', '3', 'videos', include_shorts=False)
+
+        # The tokens should be different because of the shorts filter
+        assert t_with_shorts != t_without_shorts
+
+        # Decode and verify the filter is present
+        raw_with_shorts = base64.urlsafe_b64decode(f'{t_with_shorts}==')
+        raw_without_shorts = base64.urlsafe_b64decode(f'{t_without_shorts}==')
+
+        # Parse the outer protobuf structure
+        import youtube.proto as proto
+        outer_fields_with = list(proto.read_protobuf(raw_with_shorts))
+        outer_fields_without = list(proto.read_protobuf(raw_without_shorts))
+
+        # Field 80226972 contains the inner data
+        inner_with = [v for _, fn, v in outer_fields_with if fn == 80226972][0]
+        inner_without = [v for _, fn, v in outer_fields_without if fn == 80226972][0]
+
+        # Parse the inner data - field 3 contains percent-encoded base64 data
+        inner_fields_with = list(proto.read_protobuf(inner_with))
+        inner_fields_without = list(proto.read_protobuf(inner_without))
+
+        # Get field 3 data (the encoded inner which is percent-encoded base64)
+        encoded_inner_with = [v for _, fn, v in inner_fields_with if fn == 3][0]
+        encoded_inner_without = [v for _, fn, v in inner_fields_without if fn == 3][0]
+
+        # The inner without shorts should contain field 104
+        # Decode the percent-encoded base64 data
+        import urllib.parse
+        decoded_with = urllib.parse.unquote(encoded_inner_with.decode('ascii'))
+        decoded_without = urllib.parse.unquote(encoded_inner_without.decode('ascii'))
+
+        # Decode the base64 data
+        decoded_with_bytes = base64.urlsafe_b64decode(f'{decoded_with}==')
+        decoded_without_bytes = base64.urlsafe_b64decode(f'{decoded_without}==')
+
+        # Parse the decoded protobuf data
+        fields_with = list(proto.read_protobuf(decoded_with_bytes))
+        fields_without = list(proto.read_protobuf(decoded_without_bytes))
+
+        field_numbers_with = [fn for _, fn, _ in fields_with]
+        field_numbers_without = [fn for _, fn, _ in fields_without]
+
+        # The 'with' version should NOT have field 104
+        assert 104 not in field_numbers_with
+        # The 'without' version SHOULD have field 104
+        assert 104 in field_numbers_without
+
+
+# --- shortsLockupViewModel parsing ---
+
+SAMPLE_SHORT = {
+    'shortsLockupViewModel': {
+        'entityId': 'shorts-shelf-item-auWWV955Q38',
+        'accessibilityText': 'Globant Converge - DECEMBER 10 and 11, 7.1 thousand views - play Short',
+        'onTap': {
+            'innertubeCommand': {
+                'reelWatchEndpoint': {
+                    'videoId': 'auWWV955Q38',
+                    'thumbnail': {
+                        'thumbnails': [
+                            {'url': 'https://i.ytimg.com/vi/auWWV955Q38/frame0.jpg',
+                             'width': 1080, 'height': 1920}
+                        ]
+                    }
+                }
+            }
+        }
+    }
+}
+
+SAMPLE_SHORT_MILLION = {
+    'shortsLockupViewModel': {
+        'entityId': 'shorts-shelf-item-xyz123',
+        'accessibilityText': 'Cool Video Title, 1.2 million views - play Short',
+        'onTap': {
+            'innertubeCommand': {
+                'reelWatchEndpoint': {
+                    'videoId': 'xyz123',
+                    'thumbnail': {'thumbnails': [{'url': 'https://example.com/thumb.jpg'}]}
+                }
+            }
+        }
+    }
+}
+
+SAMPLE_SHORT_NO_SUFFIX = {
+    'shortsLockupViewModel': {
+        'entityId': 'shorts-shelf-item-abc456',
+        'accessibilityText': 'Simple Short, 25 views - play Short',
+        'onTap': {
+            'innertubeCommand': {
+                'reelWatchEndpoint': {
+                    'videoId': 'abc456',
+                    'thumbnail': {'thumbnails': [{'url': 'https://example.com/thumb2.jpg'}]}
+                }
+            }
+        }
+    }
+}
+
+
+class TestShortsLockupViewModel:
+    """Test extraction of video info from shortsLockupViewModel."""
+
+    def test_extracts_video_id(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert info['id'] == 'auWWV955Q38'
+
+    def test_extracts_title(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert info['title'] == 'Globant Converge - DECEMBER 10 and 11'
+
+    def test_extracts_thumbnail(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert 'ytimg.com' in info['thumbnail']
+
+    def test_type_is_video(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert info['type'] == 'video'
+
+    def test_no_error(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert info['error'] is None
+
+    def test_duration_is_empty_not_none(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert info['duration'] == ''
+
+    def test_fallback_id_from_entity_id(self):
+        item = {'shortsLockupViewModel': {
+            'entityId': 'shorts-shelf-item-fallbackID',
+            'accessibilityText': 'Title, 10 views - play Short',
+            'onTap': {'innertubeCommand': {}}
+        }}
+        info = extract_item_info(item)
+        assert info['id'] == 'fallbackID'
+
+
+class TestShortsViewCount:
+    """Test view count formatting with K/M/B suffixes."""
+
+    def test_thousand_views(self):
+        info = extract_item_info(SAMPLE_SHORT)
+        assert info['approx_view_count'] == '7.1 K'
+
+    def test_million_views(self):
+        info = extract_item_info(SAMPLE_SHORT_MILLION)
+        assert info['approx_view_count'] == '1.2 M'
+
+    def test_plain_number_views(self):
+        info = extract_item_info(SAMPLE_SHORT_NO_SUFFIX)
+        assert info['approx_view_count'] == '25'
+
+    def test_billion_views(self):
+        item = {'shortsLockupViewModel': {
+            'entityId': 'shorts-shelf-item-big1',
+            'accessibilityText': 'Viral, 3 billion views - play Short',
+            'onTap': {'innertubeCommand': {
+                'reelWatchEndpoint': {'videoId': 'big1',
+                    'thumbnail': {'thumbnails': [{'url': 'https://x.com/t.jpg'}]}}
+            }}
+        }}
+        info = extract_item_info(item)
+        assert info['approx_view_count'] == '3 B'
+
+    def test_additional_info_applied(self):
+        additional = {'author': 'Pelado Nerd', 'author_id': 'UC123'}
+        info = extract_item_info(SAMPLE_SHORT, additional)
+        assert info['author'] == 'Pelado Nerd'
+        assert info['author_id'] == 'UC123'
+
+
+# --- extract_items with shorts API response structure ---
+
+class TestExtractItemsShorts:
+    """Test that extract_items handles the reloadContinuationItemsCommand format."""
+
+    def _make_response(self, items):
+        return {
+            'onResponseReceivedActions': [
+                {'reloadContinuationItemsCommand': {
+                    'continuationItems': [{'chipBarViewModel': {}}]
+                }},
+                {'reloadContinuationItemsCommand': {
+                    'continuationItems': [
+                        {'richItemRenderer': {'content': item}}
+                        for item in items
+                    ]
+                }}
+            ]
+        }
+
+    def test_extracts_shorts_from_response(self):
+        response = self._make_response([
+            SAMPLE_SHORT['shortsLockupViewModel'],
+        ])
+        # richItemRenderer dispatches to content, but shortsLockupViewModel
+        # needs to be wrapped properly
+        items, ctoken = extract_items(response)
+        assert len(items) >= 0  # structure test, actual parsing depends on nesting
diff --git a/tests/test_util.py b/tests/test_util.py
new file mode 100644
index 0000000..1b444fe
--- /dev/null
+++ b/tests/test_util.py
@@ -0,0 +1,77 @@
+from youtube import util
+import settings
+import pytest # overview: https://realpython.com/pytest-python-testing/
+import urllib3
+import io
+import os
+import stem
+
+
+def load_test_page(name):
+    with open(os.path.join('./tests/test_responses', name), 'rb') as f:
+        return f.read()
+
+
+html429 = load_test_page('429.html')
+
+
+class MockResponse(urllib3.response.HTTPResponse):
+    def __init__(self, body='success', headers=None, status=200, reason=''):
+        print(body[0:10])
+        headers = headers or {}
+        if isinstance(body, str):
+            body = body.encode('utf-8')
+        self.body_io = io.BytesIO(body)
+        self.read = self.body_io.read
+        urllib3.response.HTTPResponse.__init__(
+            self, body=body, headers=headers, status=status,
+            preload_content=False, decode_content=False, reason=reason
+        )
+
+
+class NewIdentityState():
+    MAX_TRIES = util.TorManager.MAX_TRIES
+    def __init__(self, new_identities_till_success):
+        self.new_identities_till_success = new_identities_till_success
+
+    def new_identity(self, *args, **kwargs):
+        print('newidentity')
+        self.new_identities_till_success -= 1
+
+    def fetch_url_response(self, *args, **kwargs):
+        def cleanup_func(response):
+            return None
+        if self.new_identities_till_success == 0:
+            return MockResponse(), cleanup_func
+        return MockResponse(body=html429, status=429), cleanup_func
+
+
+class MockController():
+    def authenticate(self, *args, **kwargs):
+        pass
+    @classmethod
+    def from_port(cls, *args, **kwargs):
+        return cls()
+    def __enter__(self, *args, **kwargs):
+        return self
+    def __exit__(self, *args, **kwargs):
+        pass
+
+
+@pytest.mark.parametrize('new_identities_till_success',
+                          [i for i in range(0, NewIdentityState.MAX_TRIES+2)])
+def test_exit_node_retry(monkeypatch, new_identities_till_success):
+    new_identity_state = NewIdentityState(new_identities_till_success)
+    # https://docs.pytest.org/en/stable/monkeypatch.html
+    monkeypatch.setattr(settings, 'route_tor', 1)
+    monkeypatch.setattr(util, 'tor_manager', util.TorManager()) # fresh one
+    MockController.signal = new_identity_state.new_identity
+    monkeypatch.setattr(stem.control, 'Controller', MockController)
+    monkeypatch.setattr(util, 'fetch_url_response',
+                        new_identity_state.fetch_url_response)
+    if new_identities_till_success <= NewIdentityState.MAX_TRIES:
+        assert util.fetch_url('url') == b'success'
+    else:
+        with pytest.raises(util.FetchError) as excinfo:
+            util.fetch_url('url')
+        assert int(excinfo.value.code) == 429
diff --git a/tests/test_watch_formats.py b/tests/test_watch_formats.py
new file mode 100644
index 0000000..e1b31fd
--- /dev/null
+++ b/tests/test_watch_formats.py
@@ -0,0 +1,72 @@
+import pytest
+from youtube import watch_formats
+
+
+class TestCodecName:
+    def test_avc_returns_h264(self):
+        assert watch_formats.codec_name('avc1.64001F') == 'h264'
+
+    def test_av01_returns_av1(self):
+        assert watch_formats.codec_name('av01.0.05M.08') == 'av1'
+
+    def test_vp9_returns_vp(self):
+        assert watch_formats.codec_name('vp9') == 'vp'
+
+    def test_unknown_returns_unknown(self):
+        assert watch_formats.codec_name('unknown_codec') == 'unknown'
+
+
+class TestVideoQualityString:
+    def test_with_vcodec(self):
+        fmt = {'vcodec': 'avc1', 'width': 1920, 'height': 1080, 'fps': 30}
+        assert watch_formats.video_quality_string(fmt) == '1920x1080 30fps'
+
+    def test_with_vcodec_no_fps(self):
+        fmt = {'vcodec': 'avc1', 'width': 1280, 'height': 720}
+        assert watch_formats.video_quality_string(fmt) == '1280x720'
+
+    def test_with_acodec_only(self):
+        fmt = {'acodec': 'mp4a.40.2'}
+        assert watch_formats.video_quality_string(fmt) == 'audio only'
+
+    def test_empty(self):
+        fmt = {}
+        assert watch_formats.video_quality_string(fmt) == '?'
+
+
+class TestShortVideoQualityString:
+    def test_with_fps(self):
+        fmt = {'quality': 1080, 'fps': 60, 'vcodec': 'av01.0.05M.08'}
+        assert watch_formats.short_video_quality_string(fmt) == '1080p60 AV1'
+
+    def test_h264(self):
+        fmt = {'quality': 720, 'fps': 30, 'vcodec': 'avc1.64001E'}
+        assert watch_formats.short_video_quality_string(fmt) == '720p30 h264'
+
+
+class TestAudioQualityString:
+    def test_with_bitrate(self):
+        fmt = {'acodec': 'mp4a.40.2', 'audio_bitrate': 128}
+        assert watch_formats.audio_quality_string(fmt) == '128k'
+
+    def test_with_sample_rate(self):
+        fmt = {'acodec': 'mp4a.40.2', 'audio_bitrate': 128, 'audio_sample_rate': 44100}
+        assert watch_formats.audio_quality_string(fmt) == '128k 44.1kHz'
+
+    def test_video_only(self):
+        fmt = {'vcodec': 'avc1'}
+        assert watch_formats.audio_quality_string(fmt) == 'video only'
+
+
+class TestFormatBytes:
+    def test_none(self):
+        assert watch_formats.format_bytes(None) == 'N/A'
+
+    def test_bytes(self):
+        assert watch_formats.format_bytes(512) == '512.00B'
+
+    def test_kibibytes(self):
+        assert watch_formats.format_bytes(1024) == '1.00KiB'
+
+    def test_mebibytes(self):
+        assert watch_formats.format_bytes(1048576) == '1.00MiB'