Download Max CPTEC

PHOTO EMBED

Tue Sep 05 2023 20:03:21 GMT+0000 (Coordinated Universal Time)

Saved by @leandrorbc #python

import os
import requests
from bs4 import BeautifulSoup
import urllib.request
from urllib.parse import urljoin

base_url = 'http://ftp.cptec.inpe.br/'

baseDir = 'modelos/tempo/SAMeT/DAILY/TMAX/'

outputDir = '/Users/agroclimate/Documents/PriceFrobes/Uruguay/Sura/CPTEC/Faltantes/'

years = list(range(2022, 2023))
months = list(range(1,4))+list(range(10, 13))
#months = list(range(10, 13))
for year in years:

    for month in months:
        if month < 10:
            ftp_path = base_url + baseDir + str(year) + '/0' + str(month) + '/'
        elif month >= 10:
            ftp_path = base_url + baseDir + str(year) + '/' + str(month) + '/'
        
        # List folder content
        response = requests.get(ftp_path)
        
        soup = BeautifulSoup(response.content, 'html.parser')
        # Find all the 'a' tags (hyperlinks) on the page
        links = soup.find_all('a')

        # Extract filenames from the href attribute of 'a' tags
        files = [link['href'] for link in links if link.has_attr('href')]

        # Filter out directories and other unwanted links
        files = [filename for filename in files if filename.endswith('.nc')]
    
        for file in files:
            try:
                file_url = urljoin(ftp_path, file)

                to_download = (outputDir + file)

                # Download the file
                urllib.request.urlretrieve(file_url, to_download)

                print(f"Downloaded: {file}")

            except:
                print("Error")
content_copyCOPY