Skip to content

y5gfunc.encode.subtitle

subtitle

Functions:

Name Description
subset_fonts

Create a subset of fonts containing only the glyphs used by specified ASS files.

extract_pgs_subtitles

Extract all PGS (HDMV PGS) subtitle tracks from an M2TS file.

subset_fonts

Create a subset of fonts containing only the glyphs used by specified ASS files.

Parameters:

Name Type Description Default

ass_path

Union[list[Union[str, Path]], str, Path]

Path to a single ASS file or a list of paths to ASS files.

required

fonts_path

Union[str, Path]

Path to the directory containing the original font files.

required

output_directory

Union[str, Path]

Path to the directory where the subsetted fonts will be saved. This directory will be created if it doesn't exist.

required

Returns:

Type Description
Path

The Path object representing the output directory.

Raises:

Type Description
RuntimeError

If the 'assfonts' command fails (returns a non-zero exit code) or if there are warnings or errors.

Source code in y5gfunc/encode/subtitle.py
def subset_fonts(
    ass_path: Union[list[Union[str, Path]], str, Path],
    fonts_path: Union[str, Path],
    output_directory: Union[str, Path],
) -> Path:
    """
    Create a subset of fonts containing only the glyphs used by specified ASS files.

    Args:
        ass_path: Path to a single ASS file or a list of paths to ASS files.
        fonts_path: Path to the directory containing the original font files.
        output_directory: Path to the directory where the subsetted fonts will be saved. This directory will be created if it doesn't exist.

    Returns:
        The Path object representing the output directory.

    Raises:
        RuntimeError: If the 'assfonts' command fails (returns a non-zero exit code) or if there are warnings or errors.
    """
    if isinstance(ass_path, (str, Path)):
        ass_path = [ass_path]

    ass_paths = [resolve_path(path) for path in ass_path]
    fonts_path = resolve_path(fonts_path)
    output_directory = resolve_path(output_directory)

    subtitle_command = ["assfonts"]
    for path in ass_paths:
        subtitle_command += ["-i", str(path)]

    subtitle_command += ["-r", "-c", "-f", str(fonts_path), "-o", str(output_directory)]

    process = subprocess.run(subtitle_command, capture_output=True, text=True)
    if process.returncode != 0:
        raise RuntimeError(f"subset_fonts: assfonts failed: {process.stdout}")

    if any(s in process.stdout for s in ["[WARN]", "[ERROR]"]):
        raise RuntimeError(f"subset_fonts: assfonts failed: {process.stdout}")

    return output_directory

extract_pgs_subtitles

extract_pgs_subtitles(m2ts_path: Union[str, Path], output_dir: Optional[Union[str, Path]] = None) -> list[dict[str, Union[str, Path, bool]]]

Extract all PGS (HDMV PGS) subtitle tracks from an M2TS file.

Parameters:

Name Type Description Default

m2ts_path

Union[str, Path]

Path to the input M2TS file.

required

output_dir

Optional[Union[str, Path]]

Optional path to the directory where extracted .sup files will be saved. If None, defaults to a subdirectory 'named '{m2ts_file_stem}_subs' next to the input M2TS file. The directory will be created if it doesn't exist.

None

Returns:

Type Description
list[dict[str, Union[str, Path, bool]]]

A list of dictionaries, where each dictionary represents a successfully extracted pgs subtitle and contains keys suitable for use in muxing functions.

Raises:

Type Description
RuntimeError

If ffprobe fails, if tsMuxeR fails, if no PGS subtitles are found, or if an expected demuxed subtitle file is not found after tsMuxeR runs.

Source code in y5gfunc/encode/subtitle.py
def extract_pgs_subtitles(
    m2ts_path: Union[str, Path], output_dir: Optional[Union[str, Path]] = None
) -> list[dict[str, Union[str, Path, bool]]]:
    """
    Extract all PGS (HDMV PGS) subtitle tracks from an M2TS file.

    Args:
        m2ts_path: Path to the input M2TS file.
        output_dir: Optional path to the directory where extracted .sup files will be saved. If None, defaults to a subdirectory
                    'named '{m2ts_file_stem}_subs' next to the input M2TS file. The directory will be created if it doesn't exist.

    Returns:
        A list of dictionaries, where each dictionary represents a successfully extracted pgs subtitle and contains keys suitable for use in muxing functions.

    Raises:
        RuntimeError: If ffprobe fails, if tsMuxeR fails, if no PGS subtitles are found, or if an expected demuxed subtitle file is not found after tsMuxeR runs.
    """

    m2ts_path = resolve_path(m2ts_path)

    if output_dir is None:
        output_dir = m2ts_path.parent / f"{m2ts_path.stem}_subs"

    output_dir = resolve_path(output_dir)

    print(f"extract_pgs_subtitles: Analyzing {m2ts_path}...")

    cmd = [
        "ffprobe",
        "-v",
        "quiet",
        "-print_format",
        "json",
        "-show_streams",
        str(m2ts_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"extract_pgs_subtitles: ffprobe failed: {result.stderr}")

    probe_data = json.loads(result.stdout)

    pgs_streams = []
    for stream in probe_data["streams"]:
        if not stream.get("id"):
            stream["id"] = hex(int(stream.get("index")) + 1)
        if stream["codec_name"] == "hdmv_pgs_subtitle":
            stream_id = stream["id"]
            language = get_language_by_trackid(m2ts_path, stream_id)

            pgs_streams.append(
                {
                    "track_id": int(stream_id, 16),
                    "language": language or "und",
                    "default": bool(stream["disposition"]["default"]),
                    "type": "PGS",
                }
            )

    if not pgs_streams:
        raise RuntimeError("extract_pgs_subtitles: No PGS subtitles found!")

    print(f"Found {len(pgs_streams)} PGS subtitle streams:")
    for stream in pgs_streams:
        default_str = " (Default)" if stream["default"] else ""
        print(f"Track {stream['track_id']}: Language {stream['language']}{default_str}")

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_dir = resolve_path(temp_dir)
        print(f"extract_pgs_subtitles: Using temporary directory: {temp_dir}")

        meta_content = ["MUXOPT --no-pcr-on-video-pid --new-audio-pes --demux\n"]
        for stream in pgs_streams:
            meta_line = f"S_HDMV/PGS, \"{m2ts_path}\", track={stream['track_id']}\n"
            meta_content.append(meta_line)
            print(f"extract_pgs_subtitles: Adding to meta: {meta_line.strip()}")

        meta_file = temp_dir / "meta.txt"
        meta_file.write_text("".join(meta_content))
        print(f"extract_pgs_subtitles: Created meta file at: {meta_file}")

        output_dir.mkdir(parents=True, exist_ok=True)
        print(f"extract_pgs_subtitles: Output directory: {output_dir}")

        print("\nextract_pgs_subtitles: Running tsMuxeR...")
        cmd = ["tsmuxer", str(meta_file), str(temp_dir)]
        print(f"extract_pgs_subtitles: Command: {' '.join(cmd)}")

        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            bufsize=1,
            universal_newlines=True,
        )

        while True:
            output = process.stdout.readline()  # type: ignore[union-attr]
            if output == "" and process.poll() is not None:
                break
            if output:
                print(f"extract_pgs_subtitles: tsMuxeR: {output.strip()}")

        stdout, stderr = process.communicate()
        if stdout:
            print(f"extract_pgs_subtitles: tsMuxeR additional output: {stdout}")
        if stderr:
            print(f"extract_pgs_subtitles: tsMuxeR error output: {stderr}")

        if process.returncode != 0:
            raise RuntimeError(
                f"extract_pgs_subtitles: tsMuxeR failed with return code {process.returncode}"
            )

        print("\nextract_pgs_subtitles: Extracting subtitles...")

        subtitles = []
        for stream in pgs_streams:
            track_num = stream["track_id"]
            try:
                sup_file = next(temp_dir.glob(f"*track_{track_num}.sup"))

                final_path = output_dir / f"track_{track_num}_{stream['language']}.sup"
                shutil.move(str(sup_file), str(final_path))

                default_str = " (Default)" if stream["default"] else ""
                print(
                    f"extract_pgs_subtitles: Extracted subtitle track {track_num} to {final_path}{default_str}"
                )

                subtitles.append(
                    {
                        "path": final_path,
                        "language": stream["language"],
                        "default": stream["default"],
                    }
                )
            except StopIteration:
                raise RuntimeError(
                    f"extract_pgs_subtitles: Could not find extracted subtitle for track {track_num}"
                )

    print("\nextract_pgs_subtitles: Extraction completed!")
    return subtitles