invidious/src/invidious/videos/description.cr

77 lines
2.1 KiB
Crystal

require "json"
require "uri"
private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int
copied = 0
while copied < count
cp = iter.next
break if cp.is_a?(Iterator::Stop)
if cp == 0x26 # Ampersand (&)
str << "&amp;"
elsif cp == 0x27 # Single quote (')
str << "&#39;"
elsif cp == 0x22 # Double quote (")
str << "&quot;"
elsif cp == 0x3C # Less-than (<)
str << "&lt;"
elsif cp == 0x3E # Greater than (>)
str << "&gt;"
else
str << cp.chr
end
# A codepoint from the SMP counts twice
copied += 1 if cp > 0xFFFF
copied += 1
end
return copied
end
def parse_description(desc, video_id : String) : String?
return "" if desc.nil?
content = desc["content"].as_s
return "" if content.empty?
commands = desc["commandRuns"]?.try &.as_a
return content if commands.nil?
# Not everything is stored in UTF-8 on youtube's side. The SMP codepoints
# (0x10000 and above) are encoded as UTF-16 surrogate pairs, which are
# automatically decoded by the JSON parser. It means that we need to count
# copied byte in a special manner, preventing the use of regular string copy.
iter = content.each_codepoint
index = 0
return String.build do |str|
commands.each do |command|
cmd_start = command["startIndex"].as_i
cmd_length = command["length"].as_i
# Copy the text chunk between this command and the previous if needed.
length = cmd_start - index
index += copy_string(str, iter, length)
# We need to copy the command's text using the iterator
# and the special function defined above.
cmd_content = String.build(cmd_length) do |str2|
copy_string(str2, iter, cmd_length)
end
link = cmd_content
if on_tap = command.dig?("onTap", "innertubeCommand")
link = parse_link_endpoint(on_tap, cmd_content, video_id)
end
str << link
index += cmd_length
end
# Copy the end of the string (past the last command).
remaining_length = content.size - index
copy_string(str, iter, remaining_length) if remaining_length > 0
end
end