Skip to content
Snippets Groups Projects
Commit fcac8d41 authored by Riccardo Boero's avatar Riccardo Boero :innocent:
Browse files

Added download checks to avoid redownloads of files.

parent e5e84c53
No related branches found
No related tags found
No related merge requests found
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
......@@ -7,15 +8,19 @@ from concurrent.futures import ThreadPoolExecutor
interested_locations = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'CzechRepublic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Iceland', 'Switzerland', 'Liechtenstein', 'Norway', 'UnitedKingdom', 'Ukraine', 'UnitedStates']
def process_file(row):
if row['Location'] in interested_locations:
file_path = f"data/{row['QuadKey']}.geojson"
if row['Location'] in interested_locations and not os.path.exists(file_path):
df = pd.read_json(row['Url'], lines=True)
df['geometry'] = df['geometry'].apply(shape)
gdf = gpd.GeoDataFrame(df, crs=4326)
gdf.to_file(f"data/{row['QuadKey']}.geojson", driver="GeoJSON")
gdf.to_file(file_path, driver="GeoJSON")
def main():
dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv")
if not os.path.exists('data'):
os.makedirs('data')
# Use ThreadPoolExecutor to process files in parallel
with ThreadPoolExecutor(max_workers=20) as executor:
executor.map(process_file, dataset_links.to_dict(orient='records'))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment