scraper.py - yamanefkar/Turk-Sploit - Sourcegraph

PHOTO EMBED

Fri Jan 19 2024 10:28:24 GMT+0000 (Coordinated Universal Time)

Saved by @Illmatickid

# Date: 12/28/2018
# Author: Mohamed
# Description: Proxy scraper 

from time import sleep
from requests import get 
from .proxy import Proxy 
from .display import Display
from .proxy_list import ProxyList
from bs4 import BeautifulSoup as bs 
from threading import Thread, RLock


class Scraper(object):

    def __init__(self):
        self.lock = RLock()
        self.is_alive = True  
        self.display = Display()
        self.scraped_proxies = []
        self.extra_proxies_link = 'http://spys.me/proxy.txt'

        self.links = [
            'https://sslproxies.org', 
            'https://free-proxy-list.net',
            'https://free-proxy-list.net/anonymous-proxy.html'
        ]

    def parse_extra_proxy(self, proxy):
        proxy = proxy.split(' ')
        addr = proxy[0].split(':')

        return {
            'ip': addr[0],
            'port': addr[1],
            'country': proxy[1].split('-')[0]
        }        

    def parse_proxy(self, proxy):
        proxy = proxy.find_all('td')
        if proxy[4].string != 'transparent' and proxy[5].string != 'transparent':
            return { 
                'ip': proxy[0].string,
                'port': proxy[1].string,
                'country': proxy[3].string,
            }

    def scrape_proxies(self, link):
        proxies = [] 

        try:
            proxies = bs(get(link).text, 'html.parser').find('tbody').find_all('tr')
        except:
            pass 
        
        if not proxies:
            with self.lock:
                if self.is_alive:
                    self.display.warning('Failed to grab proxies from {}'.format(link))
        
        for proxy in proxies:
            with self.lock:
                _proxy = self.parse_proxy(proxy)
                if _proxy:
                    self.scraped_proxies.append(_proxy)
            
    def scrape_extra_proxies(self):
        proxies = [] 

        try:
            if self.is_alive:
                proxies = get(self.extra_proxies_link).text.split('\n')
        except:
            pass 
        
        if not proxies:
            with self.lock:
                if self.is_alive:
                    self.display.warning('Failed to grab proxies from {}'.format(self.extra_proxies_link))
        
        for proxy in proxies:
            if '-H' in proxy and '-S' in proxy:
                with self.lock:
                    self.scraped_proxies.append(self.parse_extra_proxy(proxy))                    
            
    @property
    def proxies(self):
        proxy_list = ProxyList()

        threads = []
        threads = [Thread(target=self.scrape_proxies, args=[link]) for link in self.links]
        threads.append(Thread(target=self.scrape_extra_proxies))
        
        for thread in threads:
            thread.daemon = True 
            thread.start()
        
        while self.is_alive and len(threads):
            for thread in [thread for thread in threads if not thread.is_alive()]:
                threads.pop(threads.index(thread))
            sleep(0.5)            
            
        if self.is_alive:
            for proxy in self.scraped_proxies:

                if not proxy in proxy_list:
                    proxy_list.append(Proxy(proxy))

        return proxy_list.list  
content_copyCOPY

https://sourcegraph.com/github.com/yamanefkar/Turk-Sploit@677410ba6085391da47092d79c3acd018f9f7d02/-/blob/Site/Instagram/Instagram-bruteforce/Executable/lib/scraper.py