yt_dlp/extractor/nate.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

import itertools

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    str_or_none,
    traverse_obj,
    unified_strdate,
)


class NateIE(InfoExtractor):
    _VALID_URL = r'https?://tv\.nate\.com/clip/(?P<id>[0-9]+)'

    _TESTS = [{
        'url': 'https://tv.nate.com/clip/1848976',
        'info_dict': {
            'id': '1848976',
            'ext': 'mp4',
            'title': '[결승 오프닝 타이틀] 2018 LCK 서머 스플릿 결승전 kt Rolster VS Griffin',
            'description': 'md5:e1b79a7dcf0d8d586443f11366f50e6f',
            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20180908',
            'age_limit': 15,
            'duration': 73,
            'uploader': '2018 LCK 서머 스플릿(롤챔스)',
            'channel': '2018 LCK 서머 스플릿(롤챔스)',
            'channel_id': '3606',
            'uploader_id': '3606',
            'tags': 'count:59',
        },
        'params': {'skip_download': True}
    }, {
        'url': 'https://tv.nate.com/clip/4300566',
        'info_dict': {
            'id': '4300566',
            'ext': 'mp4',
            'title': '[심쿵엔딩] 이준호x이세영, 서로를 기억하며 끌어안는 두 사람!💕, MBC 211204 방송',
            'description': 'md5:be1653502d9c13ce344ddf7828e089fa',
            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20211204',
            'age_limit': 15,
            'duration': 201,
            'uploader': '옷소매 붉은 끝동',
            'channel': '옷소매 붉은 끝동',
            'channel_id': '27987',
            'uploader_id': '27987',
            'tags': 'count:20',
        },
        'params': {'skip_download': True}
    }]

    _QUALITY = {
        '36': 2160,
        '35': 1080,
        '34': 720,
        '33': 480,
        '32': 360,
        '31': 270,
    }

    def _real_extract(self, url):
        id = self._match_id(url)
        video_data = self._download_json(f'https://tv.nate.com/api/v1/clip/{id}', id)
        formats = [{
            'format_id': f_url[-2:],
            'url': f_url,
            'height': self._QUALITY.get(f_url[-2:]),
            'quality': int_or_none(f_url[-2:]),
        } for f_url in video_data.get('smcUriList') or []]
        self._sort_formats(formats)
        return {
            'id': id,
            'title': video_data.get('clipTitle'),
            'description': video_data.get('synopsis'),
            'thumbnail': video_data.get('contentImg'),
            'upload_date': unified_strdate(traverse_obj(video_data, 'broadDate', 'regDate')),
            'age_limit': video_data.get('targetAge'),
            'duration': video_data.get('playTime'),
            'formats': formats,
            'uploader': video_data.get('programTitle'),
            'channel': video_data.get('programTitle'),
            'channel_id': str_or_none(video_data.get('programSeq')),
            'uploader_id': str_or_none(video_data.get('programSeq')),
            'tags': video_data['hashTag'].split(',') if video_data.get('hashTag') else None,
        }


class NateProgramIE(InfoExtractor):
    _VALID_URL = r'https?://tv\.nate\.com/program/clips/(?P<id>[0-9]+)'

    _TESTS = [{
        'url': 'https://tv.nate.com/program/clips/27987',
        'playlist_mincount': 191,
        'info_dict': {
            'id': '27987',
        },
    }, {
        'url': 'https://tv.nate.com/program/clips/3606',
        'playlist_mincount': 15,
        'info_dict': {
            'id': '3606',
        },
    }]

    def _entries(self, id):
        for page_num in itertools.count(1):
            program_data = self._download_json(f'https://tv.nate.com/api/v1/program/{id}/clip/ranking?size=20&page={page_num}',
                                               id, note=f'Downloading page {page_num}')
            for clip in program_data.get('content') or []:
                clip_id = clip.get('clipSeq')
                if clip_id:
                    yield self.url_result(
                        'https://tv.nate.com/clip/%s' % clip_id,
                        ie=NateIE.ie_key(), video_id=clip_id)
            if program_data.get('last'):
                break

    def _real_extract(self, url):
        id = self._match_id(url)
        return self.playlist_result(self._entries(id), playlist_id=id)