Initial Commit

2025-09-22 20:19:35 +02:00
commit 3bde24308a
3 changed files with 197 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,35 @@
 # Toniebox Chapter Extractor
 This script extracts chapter information from a Toniebox file, converts it to audio chapters, and splits the audio into separate files.
 ## Prerequisites
 Ensure you have Python 3 installed on your system.
 ## Installation
 1. Clone this repository or download the script files.
 2. Navigate to the project directory.
 3. If you haven't already install ffmpeg using your favorite package manager
 4. Install the required dependencies using `pip`:
    ```bash
    pip install -r requirements.txt
    ```
 ## Usage
 To run the script, use the following command:
 ```bash
 python Taf2Ogg.py <tonie_file> <output_directory>
    <tonie_file>: The path to the Toniebox file you want to process.
    <output_directory>: The directory where the extracted chapters will be saved.
 python Taf2Ogg.py CONTENT/8D77321C/500304E0 ./output_chapters
 ```
 ## Notes
    The script assumes the sample rate for the Toniebox Opus audio is 48000 Hz.
    If the Toniebox has not cached the entire file, extraction of all chapters may not be possible.
--- a/Taf2Ogg.py
+++ b/Taf2Ogg.py
@@ -0,0 +1,160 @@
 #!/bin/python3
 import sys
 import struct
 import re 
 import ffmpeg
 import os 
 from google.protobuf import descriptor_pb2, descriptor_pool, message_factory
 # Protobuf definition in descriptor form
 file_descriptor_proto = descriptor_pb2.FileDescriptorProto()
 file_descriptor_proto.name = 'tonie_header.proto'
 file_descriptor_proto.package = 'tonie'
 file_descriptor_proto.syntax = 'proto3'
 # TonieHeader message type
 message_descriptor_proto = file_descriptor_proto.message_type.add()
 message_descriptor_proto.name = 'TonieHeader'
 # Fields of TonieHeader
 field_data_hash = message_descriptor_proto.field.add()
 field_data_hash.name = 'dataHash'
 field_data_hash.number = 1
 field_data_hash.label = descriptor_pb2.FieldDescriptorProto.LABEL_OPTIONAL
 field_data_hash.type = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES
 field_data_length = message_descriptor_proto.field.add()
 field_data_length.name = 'dataLength'
 field_data_length.number = 2
 field_data_length.label = descriptor_pb2.FieldDescriptorProto.LABEL_OPTIONAL
 field_data_length.type = descriptor_pb2.FieldDescriptorProto.TYPE_UINT32
 field_timestamp = message_descriptor_proto.field.add()
 field_timestamp.name = 'timestamp'
 field_timestamp.number = 3
 field_timestamp.label = descriptor_pb2.FieldDescriptorProto.LABEL_OPTIONAL
 field_timestamp.type = descriptor_pb2.FieldDescriptorProto.TYPE_UINT32
 field_chapter_pages = message_descriptor_proto.field.add()
 field_chapter_pages.name = 'chapterPages'
 field_chapter_pages.number = 4
 field_chapter_pages.label = descriptor_pb2.FieldDescriptorProto.LABEL_REPEATED
 field_chapter_pages.type = descriptor_pb2.FieldDescriptorProto.TYPE_UINT32
 field_chapter_pages.options.packed = True
 field_padding = message_descriptor_proto.field.add()
 field_padding.name = 'padding'
 field_padding.number = 5
 field_padding.label = descriptor_pb2.FieldDescriptorProto.LABEL_OPTIONAL
 field_padding.type = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES
 # Create a pool and add the FileDescriptorProto
 pool = descriptor_pool.DescriptorPool()
 file_descriptor = pool.Add(file_descriptor_proto)
 # Get the message descriptor and create a message class
 message_descriptor = pool.FindMessageTypeByName('tonie.TonieHeader')
 TonieHeader = message_factory.GetMessageClass(message_descriptor)
 def ExtractChapters(filename):
    with open(filename, 'rb') as f:
        # Read the first 4096 bytes which includes the header
        header_data = f.read(0x1000)
        # Decode the header
        header = TonieHeader()
        header.ParseFromString(header_data[4:])
        return list(header.chapterPages)
 def read_and_save_binary_file_bytes(input_filename, output_filename, ChapterList):
    try:
        with open(input_filename, 'rb') as f:
            # Save the bytes to the output file
            try:
                with open(output_filename, 'wb') as output_file:
                    i = 0
                    TimeList = list((""))
                    f.seek(0)
                    counter = 0 
                    print("Scanning file for Chapters...")
                    while True:
                        bytes_data = f.read(4096)
                        if i > 0:
                            output_file.write(bytes_data)
                        i +=1
                        if not bytes_data:
                            break
                        index = bytes_data.find(b"OggS")
                        if index != -1:
                            buf = bytes_data[index+6:index+22]
                            granule_pos = int.from_bytes(buf[:7], byteorder='little')
                            time_seconds = granule_pos / 48000 # 48000 = sample rate for opus
                            seq_num = int.from_bytes(buf[12:18], byteorder='little')
                            if seq_num == ChapterList[counter]:
                                print(f"Found {counter} of {len(ChapterList)-1}",end="\r")
                                TimeList.append(f"{int(time_seconds // 3600):02}:{int((time_seconds % 3600) // 60):02}:{(time_seconds % 60):02.03f}")
                                counter +=1
                                if counter >= len(ChapterList):
                                    print(f"Found {counter-1} of {len(ChapterList)-1}")
                                    break
                    if counter < len(ChapterList):
                        print("Your Toniebox has not Cached the entire file. \nExtraction of all Chapters may not be possible.")
                    print(f"Scanning Complete.")
                    return TimeList
            except FileNotFoundError:
                print(f"File {output_filename} not found.")
            except Exception as e:
                print(f"An error occurred: {e}")
    except FileNotFoundError:
        print(f"File {input_filename} not found.")
    except Exception as e:
        print(f"An error occurred: {e}")
 def split_audio(input_file, chapter_list, output_dir):
    print("Extracting files...")
    if not os.path.exists(output_dir):
        try:
            os.makedirs(output_dir)
        except Exception as e:
            print(f"Failed to create output directory: {e}")
            return
    length = len(chapter_list)
    for i in range(1, length):
        print(f"Exporting file {i} of {length-1}",end="\r")
        start_time = chapter_list[i-1]
        end_time = chapter_list[i]
        output_file = os.path.join(output_dir, f"Chapter_{i}.ogg")
        try:
            (
            ffmpeg
            .input(input_file, ss=start_time, to=end_time, v=8)
            .output(output_file, codec='copy')
            .run()
            )
        except ffmpeg.Error as e:
            print(f"Error exporting file {output_file}: {e}")
    print("\ndone")
 def main(filename, output_dir):
    try:
        output_filename = os.path.join('/tmp', os.path.basename(filename) + ".ogg")
        TimeList = read_and_save_binary_file_bytes(filename, output_filename, ExtractChapters(filename))
        split_audio(output_filename, TimeList, output_dir)
        os.remove(output_filename)
    except Exception as e:
        print(f"Error: {e}")
 if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python script.py <tonie_file> <output_directory>")
        sys.exit(1)
    filename = sys.argv[1]
    output_dir = sys.argv[2]
    main(filename, output_dir)
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
 ffmpeg-python
 protobuf