Skip to content

Commit fa11039

Browse files
committed
Add streaming manifest detection (HLS, DASH, ISM)
Support detecting M3U (HLS), MPD (MPEG-DASH), and ISM (Microsoft Smooth Streaming) manifest file types. HLS detection requires HLS-specific `#EXT-X-` tags to avoid matching generic M3U playlists. DASH and ISM are identified by their XML root elements. Closes #1
1 parent 575015c commit fa11039

File tree

9 files changed

+214
-0
lines changed

9 files changed

+214
-0
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ returns the first component of the detected MIME type
200200
- [`icns`](https://en.wikipedia.org/wiki/Apple_Icon_Image_format)
201201
- [`ico`](<https://en.wikipedia.org/wiki/ICO_(file_format)>)
202202
- [`ics`](https://en.wikipedia.org/wiki/ICalendar#Data_format) - iCalendar
203+
- [`ism`](https://learn.microsoft.com/en-us/previous-versions/iis/smooth-streaming-client/iis-smooth-streaming-client-manifest-smoothstreamingmedia-element) - Microsoft Smooth Streaming manifest
203204
- [`indd`](https://en.wikipedia.org/wiki/Adobe_InDesign#File_format)
204205
- [`it`](https://wiki.openmpt.org/Manual:_Module_formats#The_Impulse_Tracker_format_.28.it.29) - Audio module format: Impulse Tracker
205206
- [`j2c`](https://en.wikipedia.org/wiki/JPEG_2000) - JPEG 2000 codestream
@@ -217,6 +218,7 @@ returns the first component of the detected MIME type
217218
- [`lz`](https://en.wikipedia.org/wiki/Lzip)
218219
- [`lz4`](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm))
219220
- [`lzh`](<https://en.wikipedia.org/wiki/LHA_(file_format)>) - LZH archive
221+
- [`m3u`](https://en.wikipedia.org/wiki/M3U) - HLS playlist when HLS-specific `#EXT-X-` tags are present
220222
- [`m4a`](https://en.wikipedia.org/wiki/M4A) - Audio-only MPEG-4 files
221223
- [`m4b`](https://en.wikipedia.org/wiki/M4B) - Audiobook and podcast MPEG-4 files, which also contain metadata including chapter markers, images, and hyperlinks
222224
- [`m4p`](https://en.wikipedia.org/wiki/MPEG-4_Part_14#Filename_extensions) - MPEG-4 files with audio streams encrypted by FairPlay Digital Rights Management as were sold through the iTunes Store
@@ -232,6 +234,7 @@ returns the first component of the detected MIME type
232234
- [`mp2`](https://en.wikipedia.org/wiki/MPEG-1_Audio_Layer_II)
233235
- [`mp3`](https://en.wikipedia.org/wiki/MP3)
234236
- [`mp4`](https://en.wikipedia.org/wiki/MPEG-4_Part_14#Filename_extensions)
237+
- [`mpd`](https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP) - MPEG-DASH manifest
235238
- [`mpc`](https://en.wikipedia.org/wiki/Musepack) - Musepack (SV7 & SV8)
236239
- [`mpg`](https://en.wikipedia.org/wiki/MPEG-1)
237240
- [`mts`](https://en.wikipedia.org/wiki/.m2ts) - MPEG-2 Transport Stream, both raw and Blu-ray Disc Audio-Video (BDAV) versions

Sources/FileType/FileType.swift

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,12 @@ extension FileType {
319319
mime: "video/mp4"
320320
),
321321

322+
.mpd: FileType(
323+
type: .mpd,
324+
ext: "mpd",
325+
mime: "video/vnd.mpeg.dash.mpd"
326+
),
327+
322328
.mid: FileType(
323329
type: .mid,
324330
ext: "mid",
@@ -529,6 +535,12 @@ extension FileType {
529535
mime: "text/calendar"
530536
),
531537

538+
.ism: FileType(
539+
type: .ism,
540+
ext: "ism",
541+
mime: "application/vnd.ms-sstr+xml"
542+
),
543+
532544
.vcf: FileType(
533545
type: .vcf,
534546
ext: "vcf",
@@ -818,6 +830,12 @@ extension FileType {
818830
mime: "application/x-lzh-compressed"
819831
),
820832

833+
.m3u: FileType(
834+
type: .m3u,
835+
ext: "m3u",
836+
mime: "application/vnd.apple.mpegurl"
837+
),
838+
821839
.skp: FileType(
822840
type: .skp,
823841
ext: "skp",

Sources/FileType/FileTypeExtension.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public enum FileTypeExtension: CaseIterable, Sendable {
6363
case icns
6464
case ico
6565
case ics
66+
case ism
6667
case indd
6768
case it
6869
case j2c
@@ -80,6 +81,7 @@ public enum FileTypeExtension: CaseIterable, Sendable {
8081
case lz
8182
case lz4
8283
case lzh
84+
case m3u
8385
case m4a
8486
case m4b
8587
case m4p
@@ -95,6 +97,7 @@ public enum FileTypeExtension: CaseIterable, Sendable {
9597
case mp2
9698
case mp3
9799
case mp4
100+
case mpd
98101
case mpc
99102
case mpg
100103
case mts

Sources/FileType/FileTypeMatch.swift

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum FileTypeMatchType: Sendable {
6868
case ico
6969
case ics
7070
case icns
71+
case ism
7172
case indd
7273
case it
7374
case j2c
@@ -85,6 +86,7 @@ enum FileTypeMatchType: Sendable {
8586
case lz
8687
case lz4
8788
case lzh
89+
case m3u
8890
case m4a
8991
case m4b
9092
case m4p
@@ -100,6 +102,7 @@ enum FileTypeMatchType: Sendable {
100102
case mp2
101103
case mp3
102104
case mp4
105+
case mpd
103106
case mpc
104107
case mpg
105108
case mp1s
@@ -1405,6 +1408,24 @@ struct FileTypeMatch: Sendable {
14051408
match: matchRegistryEditorV4
14061409
),
14071410

1411+
FileTypeMatch(
1412+
type: .m3u,
1413+
bytesCount: 7,
1414+
match: matchM3U
1415+
),
1416+
1417+
FileTypeMatch(
1418+
type: .mpd,
1419+
bytesCount: 4,
1420+
match: matchMPD
1421+
),
1422+
1423+
FileTypeMatch(
1424+
type: .ism,
1425+
bytesCount: 4,
1426+
match: matchISM
1427+
),
1428+
14081429
FileTypeMatch(
14091430
type: .vtt,
14101431
bytesCount: 6,

Sources/FileType/streaming.swift

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import Foundation
2+
3+
private let manifestTextEncodings: [String.Encoding] = [
4+
.utf8,
5+
.utf16LittleEndian,
6+
.utf16BigEndian,
7+
]
8+
9+
private let manifestLeadingCharacters =
10+
CharacterSet.whitespacesAndNewlines.union(CharacterSet(charactersIn: "\u{FEFF}"))
11+
12+
func matchM3U(_ data: Data) -> Bool {
13+
guard
14+
let manifestText = manifestText(from: data)?
15+
.trimmingCharacters(in: manifestLeadingCharacters)
16+
.uppercased(),
17+
manifestText.hasPrefix("#EXTM3U")
18+
else {
19+
return false
20+
}
21+
22+
// Require an HLS-specific tag so generic M3U playlists do not match.
23+
return manifestText.contains("#EXT-X-")
24+
}
25+
26+
func matchMPD(_ data: Data) -> Bool {
27+
matchXMLManifest(data, rootElement: "MPD")
28+
}
29+
30+
func matchISM(_ data: Data) -> Bool {
31+
matchXMLManifest(data, rootElement: "SmoothStreamingMedia")
32+
}
33+
34+
private func matchXMLManifest(_ data: Data, rootElement: String) -> Bool {
35+
guard
36+
let manifestText = manifestText(from: data),
37+
let startTag = firstXMLStartTag(in: manifestText)
38+
else {
39+
return false
40+
}
41+
42+
return startTag == rootElement
43+
}
44+
45+
private func manifestText(from data: Data) -> String? {
46+
for encoding in manifestTextEncodings {
47+
if let manifestText = String(data: data, encoding: encoding) {
48+
return manifestText
49+
}
50+
}
51+
52+
return nil
53+
}
54+
55+
private func firstXMLStartTag(in text: String) -> String? {
56+
var remaining = text[...]
57+
58+
while true {
59+
remaining = remaining.drop(while: isManifestLeadingCharacter)
60+
61+
guard remaining.first == "<" else {
62+
return nil
63+
}
64+
65+
if remaining.hasPrefix("<?") {
66+
guard let terminator = remaining.range(of: "?>") else {
67+
return nil
68+
}
69+
70+
remaining = remaining[terminator.upperBound...]
71+
continue
72+
}
73+
74+
if remaining.hasPrefix("<!--") {
75+
guard let terminator = remaining.range(of: "-->") else {
76+
return nil
77+
}
78+
79+
remaining = remaining[terminator.upperBound...]
80+
continue
81+
}
82+
83+
if remaining.hasPrefix("<!") {
84+
guard let terminator = remaining.firstIndex(of: ">") else {
85+
return nil
86+
}
87+
88+
remaining = remaining[remaining.index(after: terminator)...]
89+
continue
90+
}
91+
92+
let rawTagName =
93+
remaining
94+
.dropFirst()
95+
.prefix { character in
96+
!character.isWhitespace && character != ">" && character != "/"
97+
}
98+
99+
guard !rawTagName.isEmpty else {
100+
return nil
101+
}
102+
103+
return String(rawTagName.split(separator: ":").last ?? rawTagName[...])
104+
}
105+
}
106+
107+
private func isManifestLeadingCharacter(_ character: Character) -> Bool {
108+
character.unicodeScalars.allSatisfy(manifestLeadingCharacters.contains)
109+
}

Tests/FileTypeTests/FileTypeTests.swift

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,56 @@ struct FileTypeTests {
325325
#expect(try FileType.detect(using: fileHandle, matching: .video)?.type == .mp4)
326326
}
327327

328+
@Test("detects streaming manifests without falling back to generic xml")
329+
func detectsStreamingManifests() {
330+
let dashManifest = Data(
331+
"""
332+
<?xml version="1.0" encoding="UTF-8"?>
333+
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" type="static"></MPD>
334+
""".utf8
335+
)
336+
let smoothManifest = Data(
337+
"""
338+
<?xml version="1.0" encoding="utf-8"?>
339+
<SmoothStreamingMedia MajorVersion="2" MinorVersion="1"></SmoothStreamingMedia>
340+
""".utf8
341+
)
342+
let hlsManifest = Data(
343+
"""
344+
#EXTM3U
345+
#EXT-X-VERSION:3
346+
#EXT-X-TARGETDURATION:6
347+
#EXTINF:6.000,
348+
segment0.ts
349+
#EXT-X-ENDLIST
350+
""".utf8
351+
)
352+
353+
#expect(FileType.detect(in: dashManifest)?.type == .mpd)
354+
#expect(FileType.detect(in: smoothManifest)?.type == .ism)
355+
#expect(FileType.detect(in: hlsManifest)?.type == .m3u)
356+
}
357+
358+
@Test("does not treat generic xml or m3u as streaming manifests")
359+
func avoidsStreamingManifestFalsePositives() {
360+
let genericXML = Data(
361+
"""
362+
<?xml version="1.0" encoding="UTF-8"?>
363+
<root>Hello</root>
364+
""".utf8
365+
)
366+
let genericM3U = Data(
367+
"""
368+
#EXTM3U
369+
#EXTINF:123,Example Artist - Example Song
370+
example.mp3
371+
""".utf8
372+
)
373+
374+
#expect(FileType.detect(in: genericXML)?.type == .xml)
375+
#expect(FileType.detect(in: genericM3U) == nil)
376+
}
377+
328378
@Test("returns nil for truncated matroska header")
329379
func returnsNilForTruncatedMatroskaHeader() {
330380
let data = Data([0x1A, 0x45, 0xDF, 0xA3])

Tests/Fixtures/fixture.ism

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<SmoothStreamingMedia MajorVersion="2" MinorVersion="1" Duration="60000000"></SmoothStreamingMedia>

Tests/Fixtures/fixture.m3u

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#EXTM3U
2+
#EXT-X-VERSION:3
3+
#EXT-X-TARGETDURATION:6
4+
#EXTINF:6.000,
5+
segment0.ts
6+
#EXT-X-ENDLIST

Tests/Fixtures/fixture.mpd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" type="static"></MPD>

0 commit comments

Comments
 (0)