// Variable 'search' initialized with the string that will be used for the Google search
let search = "Google * and SEO";
// Replace the first space character in the 'search' string with a plus sign
let searchQ = search.replace(' ', '+').trim();
// Construct the Google search URL using the modified 'searchQ' string and setting the results limit to 1000
let searchUrl = `https://www.google.com/search?q=%22${searchQ}%22&num=1000`;
// URL to a raw list of English stop words from the NLTK library hosted on GitHub
let stopwordsUrl = "https://gist.githubusercontent.com/sebleier/554280/raw/7e0e4a1ce04c2bb7bd41089c9821dbcf6d0c786c/NLTK's%20list%20of%20english%20stopwords";
// Initialize 'stopWords' as a Set to store unique stop words
let stopWords = new Set();
// Asynchronously fetch the list of stopwords from the provided URL
fetch(stopwordsUrl)
.then(response => {
// Check if the network response is ok; otherwise throw an error
if (!response.ok) throw new Error('Network response was not ok');
return response.text(); // Return the response text (stop words) to be processed
})
.then(stopwordsData => {
// Split the stopwords data by newlines and add each trimmed word to the 'stopWords' Set
stopwordsData.split(/\n/).forEach(word => stopWords.add(word.trim()));
return fetch(searchUrl); // Fetch the Google search results next
})
.then(response => {
// Check if the network response is ok; otherwise throw an error
if (!response.ok) throw new Error('Network response was not ok');
return response.text(); // Return the search HTML to be processed
})
.then(data => {
// Parse the returned HTML string into a DOM Document object
let _htmlDoc = new DOMParser().parseFromString(data, "text/html");
// Define a threshold percentile for word frequency analysis
const bottomPercentile = 0.98;
// Process and filter h3 text content from the Google search results
let processedTexts = Array.from(_htmlDoc.querySelectorAll('h3')).map(h3 =>
h3.textContent.trim().toLowerCase() // Remove whitespace, convert to lower case
.replace(/[^\w\s]|_/g, "") // Remove punctuation and underscores
.split(/\s+/).filter(word => !stopWords.has(word)) // Split into words and filter out stop words
);
// Count the frequency of each word across all h3 elements
let wordCounts = processedTexts.flatMap(words => words).reduce((acc, word) => {
acc[word] = (acc[word] || 0) + 1; // Increment word count or initialize it to 1
return acc;
}, {});
// Sort the frequencies to determine the threshold for common words
let sortedCounts = Object.values(wordCounts).sort((a, b) => a - b);
let thresholdIndex = Math.floor(sortedCounts.length * bottomPercentile);
let thresholdValue = sortedCounts[thresholdIndex];
// Filter out the words that are more frequent than the threshold
let frequentWords = new Set(Object.keys(wordCounts).filter(word => wordCounts[word] > thresholdValue));
// Reconstruct texts by removing the frequent words and ensure they are more than single words
let reconstructedText = new Set(processedTexts
.map(words => words.filter(word => !frequentWords.has(word)).join(' '))
.filter(text => text.split(' ').length > 1));
// Log each reconstructed text to the console
reconstructedText.forEach(text => console.log(text));
})
.catch(error => console.error('Fetch error:', error)); // Catch and log any errors during the fetch process