Extract Title Terms from Google
Wed Dec 13 2023 01:05:53 GMT+0000 (Coordinated Universal Time)
Saved by @richtatum #seo #javascript #js #google #serp
// Variable 'search' initialized with the string that will be used for the Google search let search = "Google * and SEO"; // Replace the first space character in the 'search' string with a plus sign let searchQ = search.replace(' ', '+').trim(); // Construct the Google search URL using the modified 'searchQ' string and setting the results limit to 1000 let searchUrl = `https://www.google.com/search?q=%22${searchQ}%22&num=1000`; // URL to a raw list of English stop words from the NLTK library hosted on GitHub let stopwordsUrl = "https://gist.githubusercontent.com/sebleier/554280/raw/7e0e4a1ce04c2bb7bd41089c9821dbcf6d0c786c/NLTK's%20list%20of%20english%20stopwords"; // Initialize 'stopWords' as a Set to store unique stop words let stopWords = new Set(); // Asynchronously fetch the list of stopwords from the provided URL fetch(stopwordsUrl) .then(response => { // Check if the network response is ok; otherwise throw an error if (!response.ok) throw new Error('Network response was not ok'); return response.text(); // Return the response text (stop words) to be processed }) .then(stopwordsData => { // Split the stopwords data by newlines and add each trimmed word to the 'stopWords' Set stopwordsData.split(/\n/).forEach(word => stopWords.add(word.trim())); return fetch(searchUrl); // Fetch the Google search results next }) .then(response => { // Check if the network response is ok; otherwise throw an error if (!response.ok) throw new Error('Network response was not ok'); return response.text(); // Return the search HTML to be processed }) .then(data => { // Parse the returned HTML string into a DOM Document object let _htmlDoc = new DOMParser().parseFromString(data, "text/html"); // Define a threshold percentile for word frequency analysis const bottomPercentile = 0.98; // Process and filter h3 text content from the Google search results let processedTexts = Array.from(_htmlDoc.querySelectorAll('h3')).map(h3 => h3.textContent.trim().toLowerCase() // Remove whitespace, convert to lower case .replace(/[^\w\s]|_/g, "") // Remove punctuation and underscores .split(/\s+/).filter(word => !stopWords.has(word)) // Split into words and filter out stop words ); // Count the frequency of each word across all h3 elements let wordCounts = processedTexts.flatMap(words => words).reduce((acc, word) => { acc[word] = (acc[word] || 0) + 1; // Increment word count or initialize it to 1 return acc; }, {}); // Sort the frequencies to determine the threshold for common words let sortedCounts = Object.values(wordCounts).sort((a, b) => a - b); let thresholdIndex = Math.floor(sortedCounts.length * bottomPercentile); let thresholdValue = sortedCounts[thresholdIndex]; // Filter out the words that are more frequent than the threshold let frequentWords = new Set(Object.keys(wordCounts).filter(word => wordCounts[word] > thresholdValue)); // Reconstruct texts by removing the frequent words and ensure they are more than single words let reconstructedText = new Set(processedTexts .map(words => words.filter(word => !frequentWords.has(word)).join(' ')) .filter(text => text.split(' ').length > 1)); // Log each reconstructed text to the console reconstructedText.forEach(text => console.log(text)); }) .catch(error => console.error('Fetch error:', error)); // Catch and log any errors during the fetch process
Comments