From fea0313dcc04911081c1b0b4800ff15edb397701 Mon Sep 17 00:00:00 2001 From: udhayarajan Date: Sun, 6 Aug 2023 10:57:24 +0530 Subject: [PATCH 1/3] fix(Instagram): crashes with zero index and getOwnerID() --- build.gradle.kts | 2 +- .../mugames/vidsnapkit/extractor/Instagram.kt | 20 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 7a3b8230..586ae9b7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -28,7 +28,7 @@ plugins { } group = "io.github.udhayarajan" -version = "5.7.0" +version = "5.7.1" // Version Naming incremented if ".." // Priority on incrementing Feature > BugFix > Beta diff --git a/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Instagram.kt b/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Instagram.kt index 337b474b..58a11fff 100644 --- a/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Instagram.kt +++ b/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Instagram.kt @@ -89,7 +89,7 @@ class Instagram internal constructor(url: String) : Extractor(url) { } private fun getShortcode(): String? { - val matcher = Pattern.compile("(?:reel|reels|p)/(.*?)[/?]").matcher(inputUrl) + val matcher = Pattern.compile("(?:reel|reels|p|tv)/(.*?)[/?]").matcher(inputUrl) return if (matcher.find()) matcher.group(1) else { logger.error("unable to find shortcode from the url=$inputUrl") null @@ -129,7 +129,7 @@ class Instagram internal constructor(url: String) : Extractor(url) { throw Exception("unable to get audio ID") } - private fun getOwnerID(page: String): String { + private fun getOwnerID(page: String): String? { val regexes = listOf("instapp:owner_user_id\" content=\"(\\d*?)\"".toRegex(), "owner_id\":\"(\\d*?)\"".toRegex()) for (r in regexes) { @@ -137,7 +137,7 @@ class Instagram internal constructor(url: String) : Extractor(url) { if (res != null) return res } - throw Exception("unable to get owner ID") + return null } private fun isPostUrl(): Boolean { @@ -274,6 +274,9 @@ class Instagram internal constructor(url: String) : Extractor(url) { extractHighlights(it) } } else if (inputUrl.contains("audio")) { + if (!isCookieValid()) { + cookies = null + } inputUrl = inputUrl.replace("/reel/", "/reels/") formats.url = inputUrl val audioID = getAudioID() @@ -367,13 +370,16 @@ class Instagram internal constructor(url: String) : Extractor(url) { } headers["User-Agent"] = "Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Instagram 105.0.0.11.118 (iPhone11,8; iOS 12_3_1; en_US; en-US; scale=2.00; 828x1792; 165586599)" - val ownerId = getOwnerID(page) + val ownerId = getOwnerID(page) ?: run { + loginRequired() + return + } val mediaId = getMediaId(page) - val response = httpRequestService.headRawResponse(MEDIA_CONTENT_LOGGED_OUT.format(mediaId, ownerId)) ?: run { + val response = httpRequestService.getRawResponse(MEDIA_CONTENT_LOGGED_OUT.format(mediaId, ownerId), headers) ?: run { clientRequestError() return } - val guestCookies = response.headers.getAll("set-cookies") ?: run { + val guestCookies = response.headers.getAll("set-cookie") ?: run { logger.info("no cookies") clientRequestError() return @@ -720,7 +726,7 @@ class Instagram internal constructor(url: String) : Extractor(url) { private suspend fun tryWithQueryHash(page: String, directExNeeded: Boolean = true) { val queryHash = withTimeoutOrNull(3000) { - getQueryHashFromAllJSInPage(page)[0] ?: DEFAULT_QUERY_HASH + getQueryHashFromAllJSInPage(page).getOrNull(0) ?: DEFAULT_QUERY_HASH } ?: DEFAULT_QUERY_HASH val appID = getAppID(page) headers["X-Ig-App-Id"] = appID From 58fbe7780a0e64530260676c92f53557ec570da0 Mon Sep 17 00:00:00 2001 From: udhayarajan Date: Sun, 6 Aug 2023 13:21:28 +0530 Subject: [PATCH 2/3] fix(Facebook): fix title extraction, and some internal crashes --- .../mugames/vidsnapkit/extractor/Facebook.kt | 113 +++++++++++------- 1 file changed, 68 insertions(+), 45 deletions(-) diff --git a/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt b/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt index 79c0fd08..c1c1aaeb 100644 --- a/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt +++ b/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt @@ -123,6 +123,25 @@ class Facebook internal constructor(url: String) : Extractor(url) { scratchWebPage(page) } + private fun getTitle(webPage: String): String { + fun extractTitle(vararg regexes: Regex, default: String = ""): String { + for (regex in regexes) { + val m = Pattern.compile(regex.toString()).matcher(webPage) + if (m.find()) { + return decodeHTML(m.group(1)!!).toString() + } + } + return default + } + + return extractTitle( + Regex("(?:true|false),\"name\":\"(.*?)\",\"savable"), + Regex("<[Tt]itle id=\"pageTitle\">(.*?) \\| Facebook<\\/title>"), + Regex("title\" content=\"(.*?)\""), + default = "Facebook_Video" + ) + } + private suspend fun scratchWebPage(webPage: String) { onProgress(Result.Progress(ProgressState.Middle)) var serverJsData: String? = null @@ -192,17 +211,6 @@ class Facebook internal constructor(url: String) : Extractor(url) { } } - fun extractTitle(vararg regexes: Regex, default: String = "") { - for (regex in regexes) { - m = Pattern.compile(regex.toString()).matcher(webPage) - if (m.find()) { - localFormats.title = decodeHTML(m.group(1)!!).toString() - return - } - localFormats.title = default - } - } - if (localFormats.imageData.isEmpty()) extractThumbnail( Regex("\"thumbnailImage\":\\{\"uri\":\"(.*?)\"\\}"), Regex("\"thumbnailUrl\":\"(.*?)\""), @@ -210,13 +218,7 @@ class Facebook internal constructor(url: String) : Extractor(url) { ) if (localFormats.title.isEmpty() || localFormats.title == "null") { - extractTitle( - Regex("(?:true|false),\"name\":\"(.*?)\",\"savable"), - Regex("<[Tt]itle id=\"pageTitle\">(.*?) \\| Facebook<\\/title>"), - Regex("title\" content=\"(.*?)\""), - - default = "Facebook_Video" - ) + localFormats.title = getTitle(webPage) } if (videoFormats.isEmpty()) { videoFormats.add(localFormats) @@ -245,6 +247,7 @@ class Facebook internal constructor(url: String) : Extractor(url) { } private fun grabRelayPrefetchedDataSearchUrl(webpage: String): Any? { + localFormats.title = getTitle(webpage) fun parseAttachment(attachment: JSONObject?, key: String): Formats? { val media = attachment?.getNullableJSONObject(key) media?.let { @@ -257,7 +260,11 @@ class Facebook internal constructor(url: String) : Extractor(url) { return null } - val data = grabRelayPrefetchedData(webpage, arrayOf("\"dash_manifest\"", "\"playable_url\"")) + val data = + grabRelayPrefetchedData( + webpage, + arrayOf("\"dash_manifest\"", "\"playable_url\"", "\"browser_native_", "\"photo_image\"") + ) data?.let { var nodes = it.getNullableJSONArray("nodes") var node = it.getNullableJSONObject("node") @@ -366,8 +373,10 @@ class Facebook internal constructor(url: String) : Extractor(url) { ) return searchFromRequireArray( array.getJSONArray(3).getJSONObject(0).getJSONObject("__bbox").getJSONArray("require") ) - if (array.getString(0).contains("RelayPrefetchedStreamCache")) return array.getJSONArray(3) - .getJSONObject(1).getJSONObject("__bbox").getJSONObject("result").getJSONObject("data") + if (array.getString(0).contains("RelayPrefetchedStreamCache")) { + return array.getJSONArray(3) + .getJSONObject(1).getJSONObject("__bbox").getJSONObject("result").getJSONObject("data") + } if (array.getString(0).contains("ScheduledServerJSWithServer")) return searchFromRequireArray( array.getJSONObject(0).getJSONObject("__box").getJSONArray("require") ) @@ -408,8 +417,6 @@ class Facebook internal constructor(url: String) : Extractor(url) { title?.let { if (scopedFormats.title.isEmpty() || it != scopedFormats.title) scopedFormats.title = title - } ?: run { - if (scopedFormats.title.isEmpty()) scopedFormats.title = "Facebook_Video" } val dashXml = media.getNullableString("dash_manifest") @@ -445,33 +452,49 @@ class Facebook internal constructor(url: String) : Extractor(url) { return scopedFormats } - private fun parseGraphqlImage(media: JSONObject): Formats? { + private fun parseGraphqlImage(media: JSONObject): Formats { val scopedFormats = localFormats.copy( - title = "", videoData = mutableListOf(), audioData = mutableListOf(), imageData = mutableListOf() + videoData = mutableListOf(), audioData = mutableListOf(), imageData = mutableListOf() ) - val image = media.getJSONObject("image") - scopedFormats.imageData.add( - ImageResource( - image.getString("uri"), resolution = "${image.get("width")}x${image.get("height")}" - ) - ) + val caption = media.getNullableString("accessibility_caption") + if (!caption.isNullOrBlank()) { + scopedFormats.title = caption + } - val blurredImage = media.getNullableJSONObject("blurred_image") - blurredImage?.getString("uri")?.let { - ImageResource( - it, resolution = Util.getResolutionFromUrl(blurredImage.getString("uri")) - ) - }?.let { - scopedFormats.imageData.add(it) + fun addImage(imgObject: JSONObject) { + val height = imgObject.getNullable("height") + val width = imgObject.getNullable("width") + var res = "" + if (height != null && width != null) { + res = width + "x" + height + } + val uri = imgObject.getNullableString("uri") + uri?.let { + scopedFormats.imageData.add( + ImageResource( + it, + resolution = res.ifEmpty { Util.getResolutionFromUrl(imgObject.getString("uri")) } + ) + ) + } } - val previewImage = media.getJSONObject("previewImage") - scopedFormats.imageData.add( - ImageResource( - previewImage.getString("uri"), resolution = Util.getResolutionFromUrl(previewImage.getString("uri")) - ) + fun getImages(vararg keywords: String) { + for (keyword in keywords) { + val imgObj = media.getNullableJSONObject(keyword) + imgObj?.let { addImage(it) } + } + } + + getImages( + "image", + "blurred_image", + "previewImage", + "viewer_image", + "photo_image" ) + return scopedFormats } @@ -498,7 +521,7 @@ class Facebook internal constructor(url: String) : Extractor(url) { } val scopedFormats = localFormats.copy( - title = "", videoData = mutableListOf(), audioData = mutableListOf(), imageData = mutableListOf() + videoData = mutableListOf(), audioData = mutableListOf(), imageData = mutableListOf() ) var xmlDecoded = xml.replace("x3C".toRegex(), "<") @@ -587,7 +610,7 @@ class Facebook internal constructor(url: String) : Extractor(url) { videoData.get("original_width") .toString() + "x" + videoData.get("original_height") + "(" + s.uppercase() + ")", - ) + ) ) } return SUCCESS From 236cc84cd103dd1961ddcb6717843dd408e093c1 Mon Sep 17 00:00:00 2001 From: udhayarajan Date: Sun, 6 Aug 2023 13:54:18 +0530 Subject: [PATCH 3/3] fix: lint --- .../kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt b/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt index c1c1aaeb..0ec43caa 100644 --- a/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt +++ b/src/commonMain/kotlin/com/mugames/vidsnapkit/extractor/Facebook.kt @@ -610,7 +610,7 @@ class Facebook internal constructor(url: String) : Extractor(url) { videoData.get("original_width") .toString() + "x" + videoData.get("original_height") + "(" + s.uppercase() + ")", - ) + ) ) } return SUCCESS