The script is copied below. I've not actually run it, or tested it, as I don't have any HTML5 video or SRT subtitle tracks to work with, but it should work in theory. And even if it doesn't, this is a reasonable starting point for building a complete system.
It's reasonably well commented, and should work just fine. I've not added any code to display the tracks dropdown, or to handle updating that part, and there's nothing in there to handle switching subtitle tracks, but that is a UI consideration IMHO. I may come back to this post and add that functionality at a later date. It also relies on being able to insert HTML elements inside a VIDEO tag. If that doesn't work, this could easily be changed to place an absolutely-positioned DIV over the top of the video, rather than drawing a DIV inside the video tag.
// Get array of videos on page
var v=document.getElementsByTagName("video");
// Loop through that array
for (var iVideos=0;iVideos<v.length;iVideos++) {
// If the video has a "Track" element (or two)
if v[iVideos].hasElement("track") {
v[iVideos].tracks = [];
v[iVideos].trackSelected = -1; // Change this to an index to change the displaying subtitle.
// Get an array of tracks for each video
var t=v[iVideos].getElementsByTagName("track");
// Look through those tracks
for (var iTracks=0;iTracks<t.length;iTracks++) {
// If the kind is "Captions" or "Subtitles"
if (t[iTracks].getAttribute("kind").toLowerCase()=="captions" || t[iTracks].getAttribute("kind").toLowerCase()=="subtitles") {
// Add an object to the tracks array for this video object
var iNewPos = v[iVideos].tracks.push({
"label": t[iTracks].getAttribute("label"),
"kind": t[iTracks].getAttribute("kind"),
"src": t[iTracks].getAttribute("src"),
"srclang": t[iTracks].getAttribute("srclang"),
"element": v[iVideos]
});
// Set up an async web request to fetch the text of the SRT file
var xmlhttp=new XMLHttpRequest();
if (!xmlhttp)
xmlhttp = new ActiveXObject("Microsoft.XMLHTTP");
// Store a reference to where we are, so we can work with it without having to wander through the page again
xmlhttp.track = v[iVideos].tracks[iNewPos];
xmlhttp.onreadystatechanged = new function() {
// If we have a valid response
if (this.readyState == 4 && this.status == 200) {
// Store the text, and fire off an SRT parser
this.track.webSRTText=responseText;
parseWebSRT(this.track);
}
}
// Fire off the request asynchronously
xmlhttp.open("GET",t[iTracks].getAttribute("src"),true);
}
}
// Set up a container for the subtitles
var oSubtitleDiv = new document.createElement("div");
oSubtitleDiv.style.position = "relative";
oSubtitleDiv.style.bottom = "0px";
oSubtitleDiv.style.left = "0px";
oSubtitleDiv.style.right = "0px";
oSubtitleDiv.cssClass = "cue";
v[iVideos].appendChild(oSubtitleDiv);
v[iVideos].subtitleDiv = oSubtitleDiv;
v[iVideos].displaying = -1;
// Set a function to update the subtitle container
// Hopefully this doesn't fire TOO often, might want to only let this run every
// x cycles or so, to ensure we don't bog the system down
v[iVideos].timeupdate = new function() {
if (this.trackSelected) {
var track = this.tracks[this.trackSelected];
if (track.cues) {
for (var iSRT=0;iSRT<track.cues.length;iSRT++) {
if (v[iVideos].currentTime > track.cues[iSRT].start && v[iVideos].currentTime < track.cues[iSRT].end) {
if (v[iVideos].displaying != iSRT) {
// Remove the currently displaying contents
this.subtitleDiv.innerHTML = "";
// Add the lines in this SRT file
for (var sLine in track.cues[iSRT].lines) {
var oLine = new document.createElement("p");
oLine.appendChild(document.createTextNode(sLine));
this.subtitleDiv.appendChild(oLine);
}
this.subtitleDiv.cssClass = "cue cue" + track.cues[iSRT].cuenumber;
v[iVideos].displaying = iSRT;
}
}
}
}
}
}
}
}
function parseWebSRT(track) {
// Set up a variable to contain the text, with normalised line endings
var text = track.webSRTText.replace(/(\r\n|\r|\n)/g, '\n');
// Split the source into SRT blocks
var aSRTParts = text.split("\n\n");
var webSRT = [];
// For each SRT part
for (sPart in aSRTParts) {
// Split it into lines
var aSRTLines = sPart.split("\n");
// Separate out the lines
var aTimes = aSRTLines[1].split(" --> ");
// Start time is the first defined
var sStart = aTimes[0];
// End time is the second defined. There may be some junk after it, separated by a space, so drop the rest
// WARNING: This may barf, not tested
var sEnd = aTimes[1].split(" ",2)[0];
// Parse the times
var iStart = parseSRTTime(sStart);
var iEnd = parseSRTTime(sEnd);
// Add the cue to the array
var iNewPos = webSRT.push({
"cuenumber": parseInt(aSRTLines[0]),
"start": iStart,
"end": iEnd,
"lines": []
})
// Add the text lines to the newly-created cue object's lines property
for (var i=2;i<aSRTLines.length;i++) {
webSRT[iNewPos].lines.push(aSRTLines[i]);
}
}
// Set the cues variable
track.cues = webSRT;
}
function parseSRTTime(sTime) {
// Split the time into parts, separated by colons
aTimeParts = sTime.split(":");
// The last part is the time in seconds, followed by a comma, followed by the time in ms
// Easy way to sort that is to replace the , with a . and parse it as a float
var iSecs = parseFloat(aTimeParts[aTimeParts.length - 1].replace(",","."));
var iPosition = 60;
// Loop through the parts BACKWARDS
for (i=aTimeParts.length-2;i>0;i--) {
// Increment the number of seconds, dependant on the position
iSecs += parseInt(aTimeParts[i]) * iPosition;
// Moving to the next highest denomenator, multiply by 60.
// As the SRT only lists up to hours, this is a safe assumption to make
iPosition = iPosition * 60;
}
// return the number of seconds total
return iSecs;
}