The VBAN TEXT/SERVICE Subprotocols
If you're familiar with Voicemeeter then you've probably heard of VBAN. It's a protocol proposed by VB-Audio for transmitting data (audio/video/text/midi) over a network. With it you can do all kinds of fantastic things.
In order to fully utilise remote controlling over VBAN you need two way communication which requires implementing both TEXT (outgoing) and SERVICE (incoming) subprotocols.
TEXT
Text is fairly straightforward in that you are required to build a packet comprised of a header matching the specification along with a payload and the VBAN server should process it.
A barebones example:
import socket
import struct
# fmt: off
BPS_OPTS: list[int] = [
0, 110, 150, 300, 600, 1200, 2400, 4800, 9600, 14400, 19200, 31250,
38400, 57600, 115200, 128000, 230400, 250000, 256000, 460800, 921600,
1000000, 1500000, 2000000, 3000000
]
# fmt: on
SUBPROTOCOL_TXT = 0x40
CHANNEL = 0
STREAMTYPE_UTF8 = 0x10
def main(
command: str,
host: str = "localhost",
port: int = 6980,
streamname: str = "Command1",
) -> None:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
header = struct.pack(
"<4s4B16sI",
b"VBAN",
BPS_OPTS.index(256000) | SUBPROTOCOL_TXT,
0,
CHANNEL,
STREAMTYPE_UTF8,
streamname.encode("utf-8").ljust(16, b"\0"),
0,
)
sock.sendto(header + command.encode("utf-8"), (host, port))
SERVICE
Service is more involved in that you are required to:
- Subscribe to the service to receive the data
- Parse the incoming data packets.
For the first step we can fire a subscription packet matching the protocol specification but we must do this repeatedly on an interval less than the time we subscribe for.
import socket
import struct
import threading
import time
SUBPROTOCOL_SERVICE = 0x60
RTPACKETREGISTER = 32
RTPACKET = 33
SUBSCRIPTION_TIMEOUT = 5
PACKET_IDENT = 0
def subscribe_to_service(
sock: socket.socket, host: str, port: int, stop_event: threading.Event
):
framecounter = 0
while not stop_event.is_set():
header = struct.pack(
"<4s4B16sI",
b"VBAN",
SUBPROTOCOL_SERVICE,
PACKET_IDENT & 0xFF,
RTPACKETREGISTER,
SUBSCRIPTION_TIMEOUT & 0xFF,
b"Register-RTP".ljust(16, b"\0"),
framecounter,
)
framecounter += 1
sock.sendto(header, (host, port))
time.sleep(SUBSCRIPTION_TIMEOUT - 1)
def main(
host: str = "localhost",
port: int = 6980,
):
stop_event = threading.Event()
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
t = threading.Thread(
target=subscribe_to_service, args=(sock, host, port, stop_event)
)
t.start()
while not stop_event.is_set():
try:
data, addr = sock.recvfrom(2048)
print(f"Received data from {addr}: {data}")
except socket.timeout:
continue
except KeyboardInterrupt:
stop_event.set()
t.join()
What we'll receive in the output is a large dump of data:
Received data from ('localhost', 6980): b'VBAN`\x00!\x00Voicemeeter-RTP\x00\x1f\x9e\t\x00\x03\x00\x00\x04\x02\x02\x01\x03\x00\x00\x00\x00\x80\xbb\x00\x00\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\x83\xfa\x83\xfa\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\x83\xfa\x83\xfa\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\x83\xfa\x83\xfa\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1\xe0\xb1...
This isn't useful to us unless we parse and convert it to python types. Here is a simple class that parses bytes 28-43 of an incoming RT Packet:
@dataclass
class VbanRTPacket:
"""Represents bytes 28-43 of an incoming RTPacket"""
HEADER_SIZE = 4 + 1 + 1 + 1 + 1 + 16
_voicemeeterType: bytes
_reserved: bytes
_buffersize: bytes
_voicemeeterVersion: bytes
_optionBits: bytes
_samplerate: bytes
def __str__(self) -> str:
return ", ".join(
[
f"{self.voicemeetertype=}",
f"{self.voicemeeterversion=}",
f"{self.samplerate=}",
]
)
@property
def voicemeetertype(self) -> str:
"""returns voicemeeter type as a string"""
return ["", "basic", "banana", "potato"][
int.from_bytes(self._voicemeeterType, "little")
]
@property
def voicemeeterversion(self) -> tuple:
"""returns voicemeeter version as a tuple"""
return tuple(self._voicemeeterVersion[i] for i in range(3, -1, -1))
@property
def samplerate(self) -> int:
"""returns samplerate as an int"""
return int.from_bytes(self._samplerate, "little")
@classmethod
def from_bytes(cls, data: bytes) -> "VbanRTPacket":
"""Returns a dataclass representing the RTPacket data
from bytes 28-43 of the incoming packet
"""
return cls(
_voicemeeterType=data[28:29],
_reserved=data[29:30],
_buffersize=data[30:32],
_voicemeeterVersion=data[32:36],
_optionBits=data[36:40],
_samplerate=data[40:44],
)
However, a VBAN server can throw a lot of different kinds of data to a listening socket so it's important to filter out the data you need. This can be done by adding in some guard clauses:
def main(
host: str = "localhost",
port: int = 6980,
):
stop_event = threading.Event()
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
t = threading.Thread(
target=subscribe_to_service, args=(sock, host, port, stop_event)
)
t.start()
while not stop_event.is_set():
try:
data, addr = sock.recvfrom(2048)
if len(data) < VbanRTPacket.HEADER_SIZE:
continue
if data[0:4] != b"VBAN":
continue
protocol = data[4] & 0xE0
if protocol != SUBPROTOCOL_SERVICE:
continue
if data[6] != RTPACKET:
continue
packet = VbanRTPacket.from_bytes(data)
print(packet)
except socket.timeout:
continue
except KeyboardInterrupt:
stop_event.set()
t.join()
The final output of the script is now:
self.voicemeetertype='potato', self.voicemeeterversion=(3, 1, 2, 2), self.samplerate=48000
Demonstrating how we can subscribe for real time data from the RTPacket service and convert the returned data into usable python types.
Conclusion
There's a lot more to the specification than that which has been demonstrated in this blog post. You can find a more complete implementation of the TEXT/SERVICE subprotocols in the vban-cmd python package along with a python interface offering an abstraction layer over the dataclasses making scripts like the following possible:
class ManyThings:
def __init__(self, vban):
self.vban = vban
def things(self):
self.vban.strip[0].label = 'podmic'
self.vban.strip[0].mute = True
def other_things(self):
self.vban.bus[3].gain = -6.3
self.vban.bus[4].eq = True
info = (
f'bus 3 gain has been set to {self.vban.bus[3].gain}',
f'bus 4 eq has been set to {self.vban.bus[4].eq}',
)
print('\n'.join(info))
def main():
conn = {'host': 'localhost', 'port': 6980, 'streamname': 'Command1'}
with vban_cmd.api('banana', **conn) as vban:
do = ManyThings(vban)
do.things()
do.other_things()
# set many parameters at once
vban.apply(
{
'strip-2': {'A1': True, 'B1': True, 'gain': -6.0},
'bus-2': {'mute': True},
'vban-in-0': {'on': True},
}
)
Or perhaps even include it in another package altogether:
The possibilities are endless.
Other fantastic projects implementing various VBAN subprotocols:
- vban A pure C implementation of AUDIO/TEXT.
- pyVBAN A python implementation of AUDIO/SERIAL and TEXT.
- obs-vban An OBS plugin implementing AUDIO.
- vbantxt A Go implementation of TEXT offering a single binary
Even more with a quick search.
Subscribe to this blog's RSS feed