I'm trying to scrape YouTube Shorts from a specific YouTube Channel, using Puppeteer running on MeteorJs Galaxy.
Here's the code that I've done so far:
import puppeteer from 'puppeteer';
import { YouTubeShorts } from '../imports/api/youTubeShorts'; //meteor mongo local instance
let URL = 'https://www.youtube.com/@ummahtoday1513/shorts'
const processShortsData = (iteratedData) => {
let documentExist = YouTubeShorts.findOne({ videoId:iteratedData.videoId })
if(documentExist === undefined) { //undefined meaning this incoming shorts in a new one
YouTubeShorts.insert({
videoId: iteratedData.videoId,
title: iteratedData.title,
thumbnail: iteratedData.thumbnail,
height: iteratedData.height,
width: iteratedData.width
})
}
}
const fetchShorts = () => {
puppeteer.launch({
headless:true,
args:[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--single-process'
]
})
.then( async function(browser){
async function fetchingData(){
new Promise(async function(resolve, reject){
const page = await browser.newPage();
await Promise.all([
await page.setDefaultNavigationTimeout(0),
await page.waitForNavigation({waitUntil: "domcontentloaded"}),
await page.goto(URL, {waitUntil:["domcontentloaded", "networkidle2"]}),
await page.waitForSelector('ytd-rich-grid-slim-media', { visible:true }),
new Promise(async function(resolve,reject){
page.evaluate(()=>{
const trialData = document.getElementsByTagName('ytd-rich-grid-slim-media');
const titles = Array.from(trialData).map(i => {
const singleData = {
videoId: i.data.videoId,
title: i.data.headline.simpleText,
thumbnail: i.data.thumbnail.thumbnails[0].url,
height: i.data.thumbnail.thumbnails[0].height,
width: i.data.thumbnail.thumbnails[0].width,
}
return singleData
})
resolve(titles);
})
}),
])
await page.close()
})
await browser.close()
}
async function fetchAndProcessData(){
const datum = await fetchingData()
console.log('DATUM:', datum)
}
await fetchAndProcessData()
})
}
fetchShorts();
I am struggling with two things here:
- Async, await, and promises, and
- Finding reason behind why Puppeteer output the
ProtocolError: Protocol error (Target.createTarget): Target closed.error in the console.
I'm new to puppeteer and trying to learn from various examples on StackOverflow and Google in general, but I'm still having trouble getting it right.