invidious/src/invidious/user/imports.cr

require "csv"

struct Invidious::User
  module Import
    extend self

    # Parse a youtube CSV subscription file
    def parse_subscription_export_csv(csv_content : String)
      rows = CSV.new(csv_content.strip('\n'), headers: true)
      subscriptions = Array(String).new

      # Counter to limit the amount of imports.
      # This is intended to prevent DoS.
      row_counter = 0

      rows.each do |row|
        # Limit to 1200
        row_counter += 1
        break if row_counter > 1_200

        # Channel ID is the first column in the csv export we can't use the header
        # name, because the header name is localized depending on the
        # language the user has set on their account
        channel_id = row[0].strip

        next if channel_id.empty?
        subscriptions << channel_id
      end

      return subscriptions
    end


    def parse_playlist_export_csv(user : User, raw_input : String, filename : String)

      LOGGER.trace("parse_playlist_export_csv: 01 raw_input '#{raw_input}'\n")

      raw_head = ""			# playlists.csv - info-line for a given playlist in the google export, that's copied above the actual playlist-infos and separated by an empty line from
      raw_body = ""			# the actual playlist content, ie. a list of videos

      # remove superfluous \n instances in front of and after any non-\n text.. so .. a trim?
      raw_input = raw_input.strip('\n')

      # Split the input into head and body content
      tmp = raw_input.split("\n\n", limit: 2, remove_empty: true)
      if tmp.size > 1
        raw_head = tmp[0]
        raw_body = tmp[1]
      else
        raw_body = tmp[0]
      end

      LOGGER.trace("parse_playlist_export_csv: 02 raw_head '#{raw_head}' -----\nraw_body '#{raw_body}'\n")

      # TODO create an import-feature (elsewhere), which works for the original google export format, ie. the playlists/ subdirectory content as a ZIP file.. or a complete youtube music export file, but this goes way beyond the scope of a playlist import.

      # defaults
      title = "title not set"                   # TODO i8n
      description = "from #{filename}"          # TODO i8n
      visibility = "Private"
      privacy = PlaylistPrivacy::Private

      if (raw_head != "")

        ### XXX for documentation of current file format of playlists.csv (today it's 2024-01-07):
        #0           1                     2                                 3                    4                       5                      6                         7                                  8                         9                         10                   11
        #Playlist ID,Add new videos to top,Playlist image 1 Create timestamp,Playlist image 1 URL,Playlist image 1 Height,Playlist image 1 Width,Playlist title (original),Playlist title (original) language,Playlist create timestamp,Playlist update timestamp,Playlist video order,Playlist visibility
        #PLc5oiabcabcabcabcabcrOvXgzabcabcm,False,,,,,display name of playlist,,2015-01-01T01:02:03+00:00,2022-10-28T02:23:15+00:00,Manual,Public' ---
        ###/documentation

        # Create the playlist from the head content (if it seems to be valid):
        csv_head = CSV.new(raw_head.strip('\n'), headers: true)
        csv_head.next
        if csv_head[11]
          LOGGER.info("parse_playlist_export_csv: 03.1 raw_head is filled, doing dual csv playlist info scan. google takeout format after october 2023 (format seems to be one line of playlist metadata taken out of the google export's playlists.csv file, added above the actual playlist.csv content, separated by an empty line)")
          title = csv_head[6]
          description = "Playlist was imported from file '#{filename}'\n\nCreated on #{csv_head[8]}\nLast updated on #{csv_head[9]}\n"          # TODO i8n
          visibility = csv_head[11]
        elsif csv_head[6]
          LOGGER.info("parse_playlist_export_csv: 03.2 raw_head is filled, doing dual csv playlist info scan. google takeout format before october 2023 (roughly).")
          title = csv_head[4]
          description = csv_head[5]
          visibility = csv_head[6]
        else                                # we are using the defaults defined above instead.
          LOGGER.info("parse_playlist_export_csv: 03.3 raw_head is filled, but in an unknown format. Using base defaults instead.")
        end

        if visibility.compare("Public", case_insensitive: true) == 0
          privacy = PlaylistPrivacy::Public
        else
          privacy = PlaylistPrivacy::Private
        end
      else					# choose a title from the provided upload filename
        LOGGER.info("parse_playlist_export_csv: 03.2 raw_head is empty. Trying to guess fine-looking title and description from the provided upload filename.")
        if (filename != "")
          title = filename
        end
        nameendpos = filename.rindex(" videos.", filename.size)	# XXX everything up to " videos.csv"
        if !nameendpos				# XXX everything up to file extension
          nameendpos = filename.rindex(".", filename.size)
        end
        if nameendpos
          title = filename[0, nameendpos]
        end
      end

      playlist = create_playlist(title, privacy, user)
      Invidious::Database::Playlists.update_description(playlist.id, description)

      # Add each video to the playlist from the body content
      csv_body = CSV.new(raw_body.strip('\n'), headers: true)
      csv_body.each do |row|
        video_id = row[0]
        if playlist
          next if !video_id
          next if video_id == "Video Id"                    # TODO i8n

          begin
            video = get_video(video_id)
          rescue ex
            next
          end

          playlist_video = PlaylistVideo.new({
            title:          video.title,
            id:             video.id,
            author:         video.author,
            ucid:           video.ucid,
            length_seconds: video.length_seconds,
            published:      video.published,
            plid:           playlist.id,
            live_now:       video.live_now,
            index:          Random::Secure.rand(0_i64..Int64::MAX),
          })

          Invidious::Database::PlaylistVideos.insert(playlist_video)
          Invidious::Database::Playlists.update_video_added(playlist.id, playlist_video.index)
        end
      end

      return playlist
    end

    # -------------------
    #  Invidious
    # -------------------

    # Import from another invidious account
    def from_invidious(user : User, body : String)
      data = JSON.parse(body)

      if data["subscriptions"]?
        user.subscriptions += data["subscriptions"].as_a.map(&.as_s)
        user.subscriptions.uniq!
        user.subscriptions = get_batch_channels(user.subscriptions)

        Invidious::Database::Users.update_subscriptions(user)
      end

      if data["watch_history"]?
        user.watched += data["watch_history"].as_a.map(&.as_s)
        user.watched.reverse!.uniq!.reverse!
        Invidious::Database::Users.update_watch_history(user)
      end

      if data["preferences"]?
        user.preferences = Preferences.from_json(data["preferences"].to_json)
        Invidious::Database::Users.update_preferences(user)
      end

      if playlists = data["playlists"]?.try &.as_a?
        playlists.each do |item|
          title = item["title"]?.try &.as_s?.try &.delete("<>")
          description = item["description"]?.try &.as_s?.try &.delete("\r")
          privacy = item["privacy"]?.try &.as_s?.try { |privacy| PlaylistPrivacy.parse? privacy }

          next if !title
          next if !description
          next if !privacy

          playlist = create_playlist(title, privacy, user)
          Invidious::Database::Playlists.update_description(playlist.id, description)

          videos = item["videos"]?.try &.as_a?.try &.each_with_index do |video_id, idx|
            if idx > CONFIG.playlist_length_limit
              raise InfoException.new("Playlist cannot have more than #{CONFIG.playlist_length_limit} videos")
            end

            video_id = video_id.try &.as_s?
            next if !video_id

            begin
              video = get_video(video_id, false)
            rescue ex
              next
            end

            playlist_video = PlaylistVideo.new({
              title:          video.title,
              id:             video.id,
              author:         video.author,
              ucid:           video.ucid,
              length_seconds: video.length_seconds,
              published:      video.published,
              plid:           playlist.id,
              live_now:       video.live_now,
              index:          Random::Secure.rand(0_i64..Int64::MAX),
            })

            Invidious::Database::PlaylistVideos.insert(playlist_video)
            Invidious::Database::Playlists.update_video_added(playlist.id, playlist_video.index)
          end
        end
      end
    end

    # -------------------
    #  Youtube
    # -------------------

    private def is_opml?(mimetype : String, extension : String)
      opml_mimetypes = [
        "application/xml",
        "text/xml",
        "text/x-opml",
        "text/x-opml+xml",
      ]

      opml_extensions = ["xml", "opml"]

      return opml_mimetypes.any?(&.== mimetype) || opml_extensions.any?(&.== extension)
    end

    # Import subscribed channels from Youtube
    # Returns success status
    def from_youtube(user : User, body : String, filename : String, type : String) : Bool
      extension = filename.split(".").last

      if is_opml?(type, extension)
        subscriptions = XML.parse(body)
        user.subscriptions += subscriptions.xpath_nodes(%q(//outline[@type="rss"])).map do |channel|
          channel["xmlUrl"].match(/UC[a-zA-Z0-9_-]{22}/).not_nil![0]
        end
      elsif extension == "json" || type == "application/json"
        subscriptions = JSON.parse(body)
        user.subscriptions += subscriptions.as_a.compact_map do |entry|
          entry["snippet"]["resourceId"]["channelId"].as_s
        end
      elsif extension == "csv" || type == "text/csv"
        subscriptions = parse_subscription_export_csv(body)
        user.subscriptions += subscriptions
      else
        return false
      end

      user.subscriptions.uniq!
      user.subscriptions = get_batch_channels(user.subscriptions)

      Invidious::Database::Users.update_subscriptions(user)
      return true
    end

    def from_youtube_pl(user : User, body : String, filename : String, type : String) : Bool
      extension = filename.split(".").last

      if extension == "csv" || type == "text/csv"
        playlist = parse_playlist_export_csv(user, body, filename)
        if playlist
          return true
        else
          return false
        end
      else
        return false
      end
    end

    def from_youtube_wh(user : User, body : String, filename : String, type : String) : Bool
      extension = filename.split(".").last

      if extension == "json" || type == "application/json"
        data = JSON.parse(body)
        watched = data.as_a.compact_map do |item|
          next unless url = item["titleUrl"]?
          next unless match = url.as_s.match(/\?v=(?<video_id>[a-zA-Z0-9_-]+)$/)
          match["video_id"]
        end
        watched.reverse! # YouTube have newest first
        user.watched += watched
        user.watched.uniq!
        Invidious::Database::Users.update_watch_history(user)
        return true
      else
        return false
      end
    end

    # -------------------
    #  Freetube
    # -------------------

    def from_freetube(user : User, body : String)
      # Legacy import?
      matches = body.scan(/"channelId":"(?<channel_id>[a-zA-Z0-9_-]{24})"/)
      subs = matches.map(&.["channel_id"])

      if subs.empty?
        profiles = body.split('\n', remove_empty: true)
        profiles.each do |profile|
          if data = JSON.parse(profile)["subscriptions"]?
            subs += data.as_a.map(&.["id"].as_s)
          end
        end
      end

      user.subscriptions += subs
      user.subscriptions.uniq!
      user.subscriptions = get_batch_channels(user.subscriptions)

      Invidious::Database::Users.update_subscriptions(user)
    end

    # -------------------
    #  Newpipe
    # -------------------

    def from_newpipe_subs(user : User, body : String)
      data = JSON.parse(body)

      user.subscriptions += data["subscriptions"].as_a.compact_map do |channel|
        if match = channel["url"].as_s.match(/\/channel\/(?<channel>UC[a-zA-Z0-9_-]{22})/)
          next match["channel"]
        elsif match = channel["url"].as_s.match(/\/user\/(?<user>.+)/)
          # Resolve URL using the API
          resolved_url = YoutubeAPI.resolve_url("https://www.youtube.com/user/#{match["user"]}")
          ucid = resolved_url.dig?("endpoint", "browseEndpoint", "browseId")
          next ucid.as_s if ucid
        end

        nil
      end

      user.subscriptions.uniq!
      user.subscriptions = get_batch_channels(user.subscriptions)

      Invidious::Database::Users.update_subscriptions(user)
    end

    def from_newpipe(user : User, body : String) : Bool
      io = IO::Memory.new(body)

      Compress::Zip::File.open(io) do |file|
        file.entries.each do |entry|
          entry.open do |file_io|
            # Ensure max size of 4MB
            io_sized = IO::Sized.new(file_io, 0x400000)

            next if entry.filename != "newpipe.db"

            tempfile = File.tempfile(".db")

            begin
              File.write(tempfile.path, io_sized.gets_to_end)
            rescue
              return false
            end

            db = DB.open("sqlite3://" + tempfile.path)

            user.watched += db.query_all("SELECT url FROM streams", as: String)
              .map(&.lchop("https://www.youtube.com/watch?v="))

            user.watched.uniq!
            Invidious::Database::Users.update_watch_history(user)

            user.subscriptions += db.query_all("SELECT url FROM subscriptions", as: String)
              .map(&.lchop("https://www.youtube.com/channel/"))

            user.subscriptions.uniq!
            user.subscriptions = get_batch_channels(user.subscriptions)

            Invidious::Database::Users.update_subscriptions(user)

            db.close
            tempfile.delete
          end
        end
      end

      # Success!
      return true
    end
  end # module
end