(chore): bumped up to 1.1.6

(refactor): improved caption track language tagging
(refactor): improved formatted and raw captions info
2026-02-04 18:22:23 +05:30 · 2025-01-21 23:35:03 +05:30 · 2025-01-21 23:31:35 +05:30 · 2025-01-21 22:30:28 +05:30 · 2025-01-21 20:40:34 +05:30 · 2025-01-21 19:45:38 +05:30
6 changed files with 57 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -111,13 +111,13 @@ pytubepp "https://youtube.com/watch?v=2lAe1cqCOXo" -i
 | Flag | Usage | Requires Parameter | Requires URL | Parameters | Default |
 | :--- | :---  | :---               | :---         | :---       | :---    |
 | -s | Choose preferred download stream | YES | YES | `144` `144p` `240` `240p` `360` `360p` `480` `480p` `720` `720p` `hd` `1080` `1080p` `fhd` `1440` `1440p` `2k` `2160` `2160p` `4k` `4320` `4320p` `8k` `mp3` (Pass any one of them) | Your chosen Default Stream via `-ds` flag |
-| -c | Choose preferred caption | YES | YES | All [ISO 639-1 Language Codes](https://www.w3schools.com/tags/ref_language_codes.asp) + some others (Pass any one of them) + `none` for No Caption eg: `en` for English | Your chosen Default Caption via `-dc` flag |
+| -c | Choose preferred caption | YES | YES | All [ISO 639-1 Language Codes](https://www.w3schools.com/tags/ref_language_codes.asp) + auto generated ones + `none` for No Caption (Pass any one of them) eg: `en` for English | Your chosen Default Caption via `-dc` flag |
 | -i | Shows the video information like: Title, Author, Views, Publication Date, Duration, Available Download Streams | NO | YES | No parameters | No default |
 | -ls | Lists all available streams (video, audio, caption) (only for debuging purposes) | NO | YES | No parameters | No default |
 | -ri | Shows the video information in raw json format | NO | YES | No parameters | No default |
 | -jp | Shows raw json output in prettified view (with indentation: 4) (primarily used with -ri flag)| NO | YES | No parameters | No default |
 | -ds | Set default download stream | YES | NO | `144p` `240p` `360p` `480p` `720p` `1080p` `1440p` `2160p` `4320p` `mp3` `max` (Pass any one of them) | `max` |
-| -dc | Set default caption | YES | NO | All [ISO 639-1 Language Codes](https://www.w3schools.com/tags/ref_language_codes.asp) + some others + `none` for No Caption (Pass any one of them) eg: `en` for English | `none` |
+| -dc | Set default caption | YES | NO | All [ISO 639-1 Language Codes](https://www.w3schools.com/tags/ref_language_codes.asp) + auto generated ones + `none` for No Caption (Pass any one of them) eg: `en` for English | `none` |
 | -df | Set custom download folder path | YES | NO | Use the full path excluding the last trailing slash within double quotes eg(in Linux): `"/path/to/folder"` (Make sure the folder path you enterted is already created and accessable) | Within `PytubePP Downloads` folder in your System's `Downloads` folder |
 | -r | Reset to default configuration (Download Folder, Default Stream) | NO | NO | No parameters | No default |
 | -sc | Show all current user configurations | NO | NO | No parameters | No default |
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pytubepp"
-version = "1.1.5"
+version = "1.1.6"
 authors = [
  { name="Subhamoy Biswas", email="hey@neosubhamoy.com" },
 ]
--- a/pytubepp/download.py
+++ b/pytubepp/download.py
@@ -1,6 +1,6 @@
 from tqdm import tqdm
 from .config import get_temporary_directory, load_config
-from .utils import get_unique_filename, postprocess_cleanup
+from .utils import get_unique_filename, postprocess_cleanup, unpack_caption
 import os, re, requests, shutil, sys, random, ffmpy
 userConfig = load_config()
@@ -21,6 +21,7 @@ def download_progressive(stream, itag, title, resolution, file_extention, captio
    if caption_code:
        print(f'Downloading Caption ({caption_code})...')
        caption = captions[caption_code]
        _, caption_lang = unpack_caption(caption)
        caption_file = os.path.join(tempDIR, random_filename + '_cap.srt')
        caption.save_captions(caption_file)
        print('Processing...')
@@ -28,7 +29,7 @@ def download_progressive(stream, itag, title, resolution, file_extention, captio
        output_temp_file_with_subs = os.path.join(tempDIR, random_filename + '_merged.' + file_extention)
        ff = ffmpy.FFmpeg(
            inputs={output_temp_file: None},
-            outputs={output_temp_file_with_subs: ['-i', caption_file, '-c', 'copy', '-c:s', 'mov_text', '-metadata:s:s:0', f'language={caption_code}', '-metadata:s:s:0', f'title={caption_code}', '-metadata:s:s:0', f'handler_name={caption_code}']}
+            outputs={output_temp_file_with_subs: ['-i', caption_file, '-c', 'copy', '-c:s', 'mov_text', '-metadata:s:s:0', f'language={caption_code}', '-metadata:s:s:0', f'title={caption_lang}', '-metadata:s:s:0', f'handler_name={caption_lang}']}
        )
        ff.run(stdout=devnull, stderr=devnull)
        devnull.close()
--- a/pytubepp/main.py
+++ b/pytubepp/main.py
@@ -3,7 +3,7 @@ from tabulate import tabulate
 from .config import get_temporary_directory, load_config, update_config, reset_config
 from .download import download_progressive, download_nonprogressive, download_audio, progress
 from .postprocess import merge_audio_video, convert_to_mp3
-from .utils import get_version, clear_temp_files, is_valid_url, network_available, ffmpeg_installed, nodejs_installed
+from .utils import get_version, clear_temp_files, is_valid_url, network_available, ffmpeg_installed, nodejs_installed, unpack_caption
 import appdirs, os, re, sys, argparse, json
 class YouTubeDownloader:
@@ -46,7 +46,7 @@ class YouTubeDownloader:
        if not nodejs_installed():
            print("\nWarning: Node.js is not installed or not found in PATH!")
            print("BotGuard poToken generation will not work properly without Node.js environment")
-            print("Please install Node.js from https://nodejs.org/en/download\n")
+            print("Please install Node.js, read https://github.com/neosubhamoy/pytubepp#%EF%B8%8F-installation for instructions\n")
        if is_valid_url(link):
            link = is_valid_url(link).group(1)
@@ -151,9 +151,19 @@ class YouTubeDownloader:
                print('Sorry, No video streams found....!!!')
                sys.exit()
-            print(f'\nTitle: {self.video.title}\nAuthor: {self.author}\nPublished On: {self.video.publish_date.strftime("%d/%m/%Y")}\nDuration: {f"{self.video.length//3600:02}:{(self.video.length%3600)//60:02}:{self.video.length%60:02}" if self.video.length >= 3600 else f"{(self.video.length%3600)//60:02}:{self.video.length%60:02}"}\nViews: {self.views}\nCaptions: {[caption.code for caption in self.captions.keys()] or "Unavailable"}\n')
+            print(f'\nTitle: {self.video.title}\nAuthor: {self.author}\nPublished On: {self.video.publish_date.strftime("%d/%m/%Y")}\nDuration: {f"{self.video.length//3600:02}:{(self.video.length%3600)//60:02}:{self.video.length%60:02}" if self.video.length >= 3600 else f"{(self.video.length%3600)//60:02}:{self.video.length%60:02}"}\nViews: {self.views}\nCaptions: {"Available" if self.captions else "Unavailable"}')
            print('\n')
            print(tabulate(table, headers=['Stream', 'Alias (for -s flag)', 'Format', 'Size', 'FrameRate', 'V-Codec', 'A-Codec', 'V-BitRate', 'A-BitRate']))
            print('\n')
            if self.captions:
                caption_table = []
                for caption in self.captions:
                    cap_code, cap_lang = unpack_caption(caption)
                    caption_table.append([cap_lang, cap_code])
                print(tabulate(caption_table, headers=['Caption', 'CaptionCode (for -c flag)']))
                print('\n')
        else:
            print('\nInvalid video link! Please enter a valid video url...!!')
@@ -204,6 +214,15 @@ class YouTubeDownloader:
                print('Sorry, No video streams found....!!!')
                sys.exit()
            captions_list = []
            if self.captions:
                for caption in self.captions:
                    cap_code, cap_lang = unpack_caption(caption)
                    captions_list.append({
                        'code': cap_code,
                        'lang': cap_lang
                    })
            output = {
                'id': self.video.video_id,
                'title': self.video.title,
@@ -213,7 +232,7 @@ class YouTubeDownloader:
                'published_on': self.video.publish_date.strftime('%d/%m/%Y'),
                'duration': self.video.length,
                'streams': streams_list,
-                'captions': [caption.code for caption in self.captions.keys()] or None
+                'captions': captions_list or None
            }
            print(json.dumps(output, indent=4 if prettify else None))
@@ -391,8 +410,10 @@ def main():
        # Handle info display flags
        if args.show_info:
            print('Loading...')
            downloader.show_video_info(args.url)
        if args.list_stream:
            print('Loading...')
            downloader.show_all_streams(args.url)
        if args.raw_info:
            downloader.show_raw_info(args.url, args.json_prettify)
@@ -401,6 +422,7 @@ def main():
        # Handle download cases
        if hasattr(args, 'stream') and hasattr(args, 'caption'):
            print('Loading...')
            if downloader.set_video_info(args.url):
                if (args.caption not in downloader.captions.keys()) and (args.caption != 'none'):
                    print('\nInvalid caption code or caption not available! Please choose a different caption...!! (use -i to see available captions)')
@@ -420,6 +442,7 @@ def main():
                else:
                    downloader.download_stream(args.url, args.stream, args.caption)
        elif hasattr(args, 'stream'):
            print('Loading...')
            if downloader.set_video_info(args.url):
                if downloader.default_caption == 'none':
                    downloader.download_stream(args.url, args.stream)
@@ -446,6 +469,7 @@ def main():
                    else:
                        print('Download cancelled! exiting...!!')
        elif hasattr(args, 'caption'):
            print('Loading...')
            if downloader.set_video_info(args.url):
                if (args.caption not in downloader.captions.keys()) and (args.caption != 'none'):
                    print('\nInvalid caption code or caption not available! Please choose a different caption...!! (use -i to see available captions)')
@@ -498,6 +522,7 @@ def main():
                    else:
                        print('Sorry, No downloadable video stream found....!!!')
        elif not any([args.show_info, args.raw_info, args.json_prettify, args.list_stream]):  # If no info flags are set
            print('Loading...')
            if downloader.set_video_info(args.url):
                if downloader.default_stream == 'max' and downloader.maxres:
                    if downloader.default_caption == 'none':
@@ -587,8 +612,13 @@ def main():
        if hasattr(args, 'default_caption'):
            if args.default_caption != downloader.default_caption:
-                if not all(c.isalpha() or c in '.-' for c in args.default_caption) or len(args.default_caption) > 10:
+                if not (re.match(r'^[a-z]{2}(-[A-Za-z]+)?$', args.default_caption) or
-                    print('\nInvalid caption code! Only a-z, A-Z, dash (-) and dot (.) are allowed with maximum 10 characters...!!')
+                        re.match(r'^a\.[a-z]{2}(-[A-Za-z]+)?$', args.default_caption) or
                        re.match(r'^none$', args.default_caption)):
                    print('\nInvalid caption code! Allowed formats are:\n'
                        '- ISO 639-1 language codes (e.g: en, zh-Hans)\n'
                        '- Auto-generated variants: a.ISO639-1LanguageCode (e.g: a.en, a.zh-Hans)\n'
                        '- none\n')
                else:
                    update_config('defaultCaption', args.default_caption)
                    print(f'\nDefault caption updated to: {args.default_caption}')
--- a/pytubepp/postprocess.py
+++ b/pytubepp/postprocess.py
@@ -1,6 +1,6 @@
 from mutagen.id3 import ID3, APIC, TIT2, TPE1, TALB
 from .config import get_temporary_directory, load_config
-from .utils import get_unique_filename, postprocess_cleanup
+from .utils import get_unique_filename, postprocess_cleanup, unpack_caption
 from .download import download_thumbnail
 import os, shutil, ffmpy
@@ -17,6 +17,7 @@ def merge_audio_video(title, resolution, file_extention, random_filename, captio
    if caption_code:
        print(f'Downloading Caption ({caption_code})...')
        caption = captions[caption_code]
        _, caption_lang = unpack_caption(caption)
        srt_file = os.path.join(tempDIR, random_filename + '_cap.srt')
        caption.save_captions(srt_file)
        vtt_file = os.path.join(tempDIR, random_filename + '_cap.vtt')
@@ -38,7 +39,7 @@ def merge_audio_video(title, resolution, file_extention, random_filename, captio
        input_params = {video_file: None, audio_file: None}
        output_params = {output_temp_file: ['-i', subtitle_file, '-c:v', 'copy', '-c:a', 'copy', 
                        '-c:s', subtitle_codec, '-metadata:s:s:0', f'language={caption_code}',
-                        '-metadata:s:s:0', f'title={caption_code}', '-metadata:s:s:0', f'handler_name={caption_code}']}
+                        '-metadata:s:s:0', f'title={caption_lang}', '-metadata:s:s:0', f'handler_name={caption_lang}']}
        devnull = open(os.devnull, 'w')
        ff = ffmpy.FFmpeg(inputs=input_params, outputs=output_params)
--- a/pytubepp/utils.py
+++ b/pytubepp/utils.py
@@ -36,7 +36,7 @@ def get_version():
        return "Unknown"
 def is_valid_url(url):
-    match = re.search(r"(https?://(?:www\.|music\.)?youtube\.com/watch\?v=[^&]{11}|https?://youtu\.be/[^?&]*(\?si=[^&]*)?)", url)
+    match = re.search(r"(https?://(?:www\.|music\.)?youtube\.com/(?:watch\?v=[^&]{11}|shorts/[^?&]+)|https?://youtu\.be/[^?&]*(\?si=[^&]*)?)", url)
    return match
 def get_unique_filename(filename, directory=downloadDIR):
@@ -47,6 +47,17 @@ def get_unique_filename(filename, directory=downloadDIR):
        counter += 1
    return filename
 def unpack_caption(caption):
    caption_str = str(caption)
    code_start = caption_str.find('code="') + 6
    code_end = caption_str.find('"', code_start)
    lang_start = caption_str.find('lang="') + 6
    lang_end = caption_str.find('"', lang_start)
    code = caption_str[code_start:code_end]
    lang = caption_str[lang_start:lang_end]
    return code, lang
 def postprocess_cleanup(dir, files, random_filename):
    for file in files:
        file_path = os.path.join(dir, random_filename + file)