First main working coding attempt
First main working coding attempt, explanation in 'comments and last post
Code:
Sub GetStuffFrom11DigitYouTube()
Rem 0 Worksheets info
Dim WsYT11 As Worksheet: Set WsYT11 = ThisWorkbook.Worksheets("ElevenDigitYT")
Dim RngWsYT11 As Range: Set RngWsYT11 = WsYT11.Range("A1:A1008")
Dim Unics As String
Dim Cnt As Long
For Cnt = 2 To 1009
If InStr(1, Unics, RngWsYT11.Item(Cnt).Value2, vbBinaryCompare) = 0 Then ' Check to see if I not got this the 11 digit bit yet ..... but there may be duplicates for the list from each text file as I click a bit above the last link, say on 76th link ,rather than the typical last 79th link, ( if I clicked on the 79th link I would get the unique 11 digit bit for the 79th video twice from the first two text files, by clicking on the 76th link I will get in both the first two text files the 11 digit bit for the 76th 77th 78th and 79th )
Let RngWsYT11.Item(Cnt).Offset(0, 1).Value2 = RngWsYT11.Item(Cnt).Value2 ' Puts the 11 digit bit in column B if i have not had that 11 digit bit yet
Let Unics = Unics & RngWsYT11.Item(Cnt).Value2 & " " ' Put the 11 digit bit in a string which i check to see if i got this one already
Else
' already got this 11 digit bit, so leave the row empty
End If
Next Cnt
Rem 1 The main stuff now - take every 11 digit bit , make a full link of it, scrape all the text and do all the business
For Cnt = 2 To 1009
If RngWsYT11.Item(Cnt).Offset(0, 1).Value2 <> "" Then
With CreateObject("MSXML2.ServerXMLHTTP")
.Open "GET", "https://www.youtube.com/watch?v=" & RngWsYT11.Item(Cnt).Offset(0, 1).Value2, False ' '
.setRequestHeader "User-Agent", "Chrome" ' https://eileenslounge.com/viewtopic.php?p=303639#p303639
.send ' varBody:= ' No extra info for type GET. .send actually makes the request
While .readyState <> 4: DoEvents: Wend ' Allow other processes to run while the web page loads. Think this is part of the True option
Dim PageSrc As String: Let PageSrc = .responseText ' Save the HTML code in the (Global) variable. ': Range("P1").Value = PageSrc 'For me for a print out copy to text file etc. The responseText property returns the information requested by the Open method as a text string
End With
'_1(ii) Optional secion to put the text string into a text file , for ease of code developments and debugging
Dim FileNum2 As Long: Let FileNum2 = FreeFile(0) ' https://msdn.microsoft.com/en-us/vba/language-reference-vba/articles/freefile-function
Dim PathAndFileName2 As String
Let PathAndFileName2 = ThisWorkbook.Path & "\" & "WieGehtsYouTubeServerChrome" & RngWsYT11.Item(Cnt).Offset(0, 1).Value2 & ".txt" ' "WieGehtsYouTubeServerChrome526" & ".txt" ' "WieGehtsYouTubeServerChrome451" & ".txt" ' "WieGehtsYouTubeServerChrome376" & ".txt" ' "WieGehtsYouTubeServerChrome301" & ".txt" ' "WieGehtsYouTubeServerChrome226" & ".txt" ' "WieGehtsYouTubeServerChrome151" & ".txt" ' "WieGehtsYouTubeServerChrome76" & ".txt" ' "WieGehtsYouTubeServerChrome1" & ".txt" '
Open PathAndFileName2 For Output As #FileNum2 ' ' The text file will be made if not there, and if it is there and already contains data, then the data will be overwritten
Print #FileNum2, PageSrc '
Close #FileNum2
' If RngWsYT11.Item(Cnt).Offset(0, 1).Value2 = "GWGzz5p5D80" Then Stop In case I want to stop at a particular 11 digit bit for debugging
Dim TextBit As String ' The idea is to get a chunk of text that seems to have all the info I want
Let TextBit = Split(PageSrc, "videoDescriptionHeaderRenderer""", 2, vbBinaryCompare)(1)
'Let TextBit = Mid(TextBit, 1, 600)
Let TextBit = Mid(TextBit, 1, 1400)
Dim Title As String
' Let Title = Split(TextBit, ":{""title"":{""runs"":[{""text"":""", 2, vbBinaryCompare)(1)
' 'Let Title = Split(Title, """}]},""channel""", 2, vbBinaryCompare)(0)
' Let Title = Split(Title, """}", 2, vbBinaryCompare)(0)
' If InStr(1, TextBit, """,""navigationEndpoint""", vbBinaryCompare) <> 0 Then
' Let Title = Split(TextBit, ":{""title"":{""runs"":[{""text"":""", 2, vbBinaryCompare)(1)
' Dim Pos1nav As Long, Pos2nav As Long
' Let Pos1nav = InStr(1, Title, """,""navigationEndpoint""", vbBinaryCompare)
' Let Pos2nav = InStr(Pos1nav, Title, "{""text"":""", vbBinaryCompare)
' Let Title = Left(Title, Pos1nav - 1) & Right(Title, Len(Title) - (Pos2nav + 8))
' Let Title = Split(Title, """}", 2, vbBinaryCompare)(0)
' Else
' End If
' That above checked for link text crap, but only worked if the link was at the beginning of the title: If the link was inside it or at the end, we will end up with multiple text bits, (and the link text crap) I need to join together and ignore link text crap - BTW "link text crap" is what comes after the text seen for the link which is needed to make it work, but i don't want it .... Text1 .. Textseenforlink .. textcrapneededtomakelinkwork ... Text3 etc
Dim posTxtTag As Long ' Usually there will be just one bit of {"text":" after which come the title, but sometimes there may be a few as in the case of a link in the title..
Let posTxtTag = InStr(1, TextBit, "{""text"":""", vbBinaryCompare)
Do While posTxtTag <> 0 And posTxtTag < InStr(1, TextBit, """channel""", vbBinaryCompare) ' hopefully the text channel always comes after the title and before anything else I want
Dim posTxtTagEnd As Long, posTxtTagEnd2 As Long
Let posTxtTagEnd = InStr(posTxtTag, TextBit, """,""navigationEndpoint", vbBinaryCompare) ' This gives me the end of the text for the case of a link text
If posTxtTagEnd = 0 Then Let posTxtTagEnd = 999 ' If I ain't got a link then I make this big so it does not get used
Let posTxtTagEnd2 = InStr(posTxtTag, TextBit, """}", vbBinaryCompare) ' mostly this would be the end of a text bit
Let posTxtTagEnd = Application.WorksheetFunction.Min(posTxtTagEnd, posTxtTagEnd2) ' Whatever text end I have or watever text end comes first will be used
Let Title = Title & Mid(TextBit, posTxtTag + 9, (posTxtTagEnd - (posTxtTag + 9))) ' mostly this would only be done once, just more times for building the text if its split into bits by the use of a link in the title
Let posTxtTag = InStr(posTxtTagEnd, TextBit, "{""text"":""", vbBinaryCompare) ' This will mostly be 0, unless text is split into bits by the use of a link in the title
Loop ' While posTxtTag <> 0
Let RngWsYT11.Item(Cnt).Offset(0, 2).Value2 = Title
Let Title = "" ' If i don't do this the Title = Title & coding will keep adding the titles together
RngWsYT11.Item(Cnt).Offset(0, 2).Select ' This is helpful or else I cant see the worksheet being filled after the first few
' To get the next info i use a lot the Split Split bit Yasser showed me https://eileenslounge.com/viewtopic.php?p=303638#p303638
Dim Views As String
Let Views = Split(TextBit, """},""views"":{""simpleText"":""", 2, vbBinaryCompare)(1)
Let Views = Split(Views, " Aufrufe""}", 2, vbBinaryCompare)(0)
Let Views = Replace(Views, ".", "", 1, -1, vbBinaryCompare) ' I don't like seperators of any kind
Let RngWsYT11.Item(Cnt).Offset(0, 4).Value2 = Views
Dim PubDate As String
Let PubDate = Split(TextBit, "publishDate"":{""simpleText"":""", 2, vbBinaryCompare)(1)
Let PubDate = Split(PubDate, """},""factoid"":[{", 2, vbBinaryCompare)(0)
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value2 = PubDate
' date nightmares again
Dim PubDateV2 As Long, Naw As Long ' DateSerial(year, month, day)
Dim PubDateRaw As String: Let PubDateRaw = Replace(PubDate, "Premiere am ", "", 1, 1, vbBinaryCompare)
Let PubDateV2 = DateSerial(Right(PubDateRaw, 4), Mid(PubDateRaw, 4, 2), Left(PubDateRaw, 2))
Let Naw = Evaluate("=NOW()"): ' Debug.Print Naw
' Debug.Print Format(PubDateV2, "\Da\y i\s " & "dddd")
' Debug.Print Format(PubDateV2, "dddd DD mmm YYYY") & " " & Format(Naw, "dddd DD mmm YYYY")
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value2 = PubDateV2
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value = PubDate
Let RngWsYT11.Item(Cnt).Offset(0, 3).Value = Format(PubDateV2, "dddd DD mmm YYYY")
Let RngWsYT11.Item(Cnt).Offset(0, 5).Value = Int(Views / (Naw - PubDateV2))
Dim Likeses As String
Let Likeses = Split(TextBit, "Mag ich\""-Bewertungen""},""accessibilityText"":""", 2, vbBinaryCompare)(1)
Let Likeses = Split(Likeses, " \""Mag ich\""-Bewertungen", 2, vbBinaryCompare)(0)
If InStr(1, Likeses, "Keine", vbBinaryCompare) = 0 Then ' Sometimes the likes are not there or not showmn or something - only happend in a video not from the main author
'Let RngWsYT11.Item(Cnt).Offset(0, 6).Value = Likeses
Let RngWsYT11.Item(Cnt).Offset(0, 7).Value = Int(Likeses / (Naw - PubDateV2))
Else
Let Likeses = "Keine"
End If
Let RngWsYT11.Item(Cnt).Offset(0, 6).Value = Likeses
RngWsYT11.Parent.Columns("A:H").AutoFit
Else
End If
Next Cnt
End Sub
VBA arrays coding to match up 2 slightly different Titles of same Video in 2 lists
This is post #98 #post19705
https://excelfox.com/forum/showthrea...ll=1#post19705 ???
https://excelfox.com/forum/showthrea...ge10#post19705
Final video Folder Video order
I need to do some sorting. Order sorting, for example based on date, and / or add a date “stamp” …
The problem is that my final modified titles on the actual videos will not tie up perfectly with those got from the scrapping coding. (But I note here that as time goes on the two will get closer, as I tend to apply more and more adjusting at the initial scrapping level , based on the modifications later that I find I need to do )
I spent a few frustrating days trying to modify an efficient range.Find to do this, but it did not really do it without being very complicated so wasting the otherwise efficient way the range.Find seems to work.
Finally a simpler VBA array coding seems OK for now, so I need a Simple VBA arrays coding to match up two Title list where the Titles for a particular video may be slightly different in the two lists
VBA arrays coding to match up 2 slightly different Titles of same Video in 2 lists
The function takes in …
__ ( The value ( Title text ) to be looked for , the range (as range object) to be looked in ) returning the found cell as range object
The basic idea is to split the Title to be looked for by a space so we have a one dimensional array of all the words in that title, and then we try to find a title in the list to be searched that has at least a certain number , HitsWish , of those words. That number is determined empirically , and if that amount of number is not found we keep reducing the HitsWish. So we do our best to match as many words as possible. That way we improve the chances of matching the right one
Rem 1 This does some tidying of given Title text value to be looked for. This follows along the usual typical tidying up that I do of Titles, but as noted, as time goes on the two titles for the same video will get closer, as I tend to apply more and more adjusting at the initial scrapping level , based on the modifications later that I find I need to do
Rem 2 is a bit of a customized fiddle thing. This is because there is a chance the first few words would never be found, even later, as I might have had some number or other ordering text added at the start. So I make an adjustment fiddle thing and variable that means I don’t include those words. Its basically based on a quick more conventional Range.Find thing to see if the first word(s) are anywhere in the list. If not then from then on we never try those.
Rem 3 is the main searching Loop(s). Three of them, in a nested fashion.
_ The outer loop , loops backwards if necessary to reduce the number of words we try to find in a single title in the list to be searched for a match.
_ The next Loop inside is looping all the rows in the list of titles to be searched for a match
_ The inner most loop goes through all (or most depending on what happened in Rem 2 ) of the words from the title to be looked for, and if we reach the required number of hits, HitsWish , then we terminate after passing the found cell to the result of the function, This is the bit that does that…
Set TitlSrch = SrchClm.Item(Rw)
Here is the initial final macro, at this stage :
Code:
' This function is a VBA array looping thing. It tidies up a bit a string Title you give it in the typical way i might tidy up a title. Then it splits that by spaces, and tries to match a lot of words from that in a Title in the given range of Titles. It reduces the amount of words it tries to match until it finds a match or gives up and tells you it never managed it
Public Function TitlSrch(TrgtVal As String, SrchClm As Range) As Range 'TrgtVal is range selected value, SrchClm is LookUpTable The return is the found cell range
Let Application.EnableEvents = True
Rem 1 Some tidying of given string.
' 1a) This follows along the usual typical tyding up that I do of Titles
Dim SchTxt As String: Let SchTxt = Trim(TrgtVal) ' Trim(Trgt.Value)
' Initial check for if multi words if there are spaces
If InStr(1, SchTxt, " ", vbBinaryCompare) > 0 Then ' Check for more than 1 word to look for==============
' Remove all but single space in between words to allow split via a space
Let SchTxt = Evaluate("=TRIM(SUBSTITUTE(" & """" & SchTxt & """" & ",CHAR(160),CHAR(32)))") ' TRIM function trims the 7-bit ASCII space character (value 32). In the Unicode character set, there is an additional space character called the nonbreaking space character that has a decimal value of 160. This character is commonly used in Web pages as the HTML entity, . By itself, the TRIM function does not remove this nonbreaking space character. https://www.excelforum.com/excel-formulas-and-functions/1217202-is-there-a-function-similar-to-trim-but-that-only-removes-trailing-spaces-2.html Note also that spaces more than 1 are removed from in between text
' Do some empirical text tidying up that I might typically have done in a final video title
Let SchTxt = Replace(SchTxt, "ä", "ae", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "ü", "ue", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "ö", "oe", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "Ä", "AE", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "Ü", "UE", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "Ö", "OE", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "-", " ", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "#wiegehtyoutube", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "!", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "?", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "ß", "ss", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "€", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, ":", " ", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "#", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "&", " ", 1, -1, vbBinaryCompare) '
Let SchTxt = Replace(SchTxt, "'", " ", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "‚", " ", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, """", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "“", " ", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "„", " ", 1, -1, vbBinaryCompare) ' „ajdffak“
Let SchTxt = Replace(SchTxt, "+", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, ".", "", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "YouTube", "UT", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "[", "(", 1, -1, vbBinaryCompare)
Let SchTxt = Replace(SchTxt, "]", ")", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(SchTxt, "-", " ", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(SchTxt, "-", " ", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(SchTxt, "-", " ", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(SchTxt, "-", " ", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(SchTxt, "-", " ", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(schtxt, "-", " ", 1, -1, vbBinaryCompare)
' Let SchTxt = Replace(schtxt, "-", " ", 1, -1, vbBinaryCompare)
Let SchTxt = Application.WorksheetFunction.Trim(SchTxt) ' In case any spaces caused by removing stuff, as we still just want one space between for splitting
Dim SchPts() As String: Let SchPts() = VBA.Strings.Split(SchTxt, " ", -1) ' Do split on single space to get multiple words
'1b) removing of some words, for example if they are very common, or small
Dim strNew As String
Dim Cnt As Long
For Cnt = LBound(SchPts()) To UBound(SchPts())
If Len(SchPts(Cnt)) < 5 Then
' ignore short words
Else
Select Case SchPts(Cnt)
Case "YouTube", "Youtube"
' ignore those words
Case Else
Let strNew = strNew & SchPts(Cnt) & " " ' Building string from any nmot ignored words
End Select
End If
Next Cnt
Let strNew = Left(strNew, Len(strNew) - 1) ' Take off last space
If InStr(1, strNew, " ", vbBinaryCompare) = 0 Then GoTo SS ' go to Look for just one word
Let SchPts() = VBA.Strings.Split(strNew, " ", -1)
Dim HitsWish As Long: Let HitsWish = 6 ' The most number of words we must find to make it as sucess before trying less words. Set this empirically
Rem 2 checking to find at least one word
Dim Adj As Long ' This is used to add to array index if first word was not found
For Cnt = 0 To UBound(SchPts())
Dim FndCel As Range ' used for result of any search
Set FndCel = SrchClm.Find(what:=SchPts(0 + Adj), LookIn:=xlFormulas, Lookat:=xlPart, searchorder:=xlByRows, searchdirection:=xlNext, MatchCase:=False)
If FndCel Is Nothing Then
Let Adj = Adj + 1
If Adj > UBound(SchPts()) Then MsgBox Prompt:="Cant find any words in " & vbCr & vbLf & """" & TrgtVal & """" & vbCr & vbLf & """" & strNew & """": Debug.Print "Cant find any words in " & vbCr & vbLf & """" & TrgtVal & """" & vbCr & vbLf & """" & strNew & """": GoTo TheEnd
Else
Exit For
End If
Next Cnt
If Adj = UBound(SchPts()) Then Let SchTxt = SchPts(UBound(SchPts())): GoTo SS ' We only got one word to look for, the last word in our array of words to look for, the only word that was found actually, so going to SS here means I will look for it again there, nevermind
' At this point we have at least two words and hopefully at least 4. Normally we would have a total of UBound(SchPts())+1. But this will be reduced by Adj
' If UBound(SchPts()) + 1 - Adj = 6 Then Let HitsWish = 6 ' We only have 6 words to search for
If UBound(SchPts()) + 1 - Adj = 5 Then Let HitsWish = 5 ' We only have 5 words to search for
If UBound(SchPts()) + 1 - Adj = 4 Then Let HitsWish = 4 ' We only have 4 words to search for
If UBound(SchPts()) + 1 - Adj = 3 Then Let HitsWish = 3 ' We only have three words to search for
If UBound(SchPts()) + 1 - Adj = 2 Then Let HitsWish = 2 ' We only have two words to search for
Rem 3 Check for Hits wanted, or less
Dim arrSrchClm() As Variant: Let arrSrchClm() = SrchClm.Value
Dim Hits
For Hits = HitsWish To 1 Step -1 ' If we dont jump out of the loop, then we reduce the hit cpount goal and try again
Dim Rw As Long
For Rw = 1 To UBound(arrSrchClm(), 1)
Dim CntHit As Long: Let CntHit = 0
For Cnt = 0 + Adj To UBound(SchPts())
If InStr(1, arrSrchClm(Rw, 1), SchPts(Cnt), vbBinaryCompare) > 0 Then
Let CntHit = CntHit + 1
If CntHit = Hits Then Debug.Print Hits & " Hits for """ & SrchClm.Item(Rw).Value & """": Set TitlSrch = SrchClm.Item(Rw): GoTo TheEnd
Else
End If
Next Cnt
Next Rw
Next Hits
Debug.Print "Nothing": Exit Function
SS: ' Do not put this before the Else idiot!
Set FndCel = SrchClm.Find(what:=VBA.Strings.Trim(SchTxt), LookIn:=xlFormulas, Lookat:=xlPart, searchorder:=xlByRows, searchdirection:=xlNext, MatchCase:=False) ' I might sometimes be doing this twice. Nevermind
If Not FndCel Is Nothing Then
Set TitlSrch = FndCel: Let Hits = 1: GoTo TheEnd
Else
' Selection.Offset(0, 2).Select: GoTo TheEnd '''_- Exit Function ' case no single word match
End If
End If ' Finished case single word to look for or multiple words to look for=============================
TheEnd: '''_- Exit Function
' GoToEmptyCellNearby
Let Application.EnableEvents = True
EndFuk:
End Function
In the next post I will add some notes for the first few actual uses of that, ( and may then edit the above function coding a bit as I go along )