Shortcuts

Source code for torchgeo.datasets.openbuildings

# Copyright (c) TorchGeo Contributors. All rights reserved.
# Licensed under the MIT License.

"""Open Buildings datasets."""

import glob
import json
import os
from collections.abc import Callable, Iterable
from typing import Any, ClassVar

import fiona
import fiona.transform
import matplotlib.pyplot as plt
import pandas as pd
import rasterio
import shapely
import shapely.wkt as wkt
import torch
from geopandas import GeoDataFrame
from matplotlib.figure import Figure
from pyproj import CRS

from .errors import DatasetNotFoundError
from .geo import VectorDataset
from .utils import GeoSlice, Path, check_integrity


[docs]class OpenBuildings(VectorDataset): r"""Open Buildings dataset. The `Open Buildings <https://sites.research.google/open-buildings/>`__ dataset consists of computer generated building detections across the African continent. Dataset features: * 516M building detections as polygons with centroid lat/long * covering area of 19.4M km\ :sup:`2`\ (64% of the African continent) * confidence score and `Plus Code <https://maps.google.com/pluscodes/>`_ Dataset format: * csv files containing building detections compressed as csv.gz * meta data geojson file The data can be downloaded from `here <https://sites.research.google/open-buildings/#open-buildings-download>`__. Additionally, the `meta data geometry file <https://openbuildings-public-dot-gweb-research.uw.r.appspot.com/public/tiles.geojson>`_ also needs to be placed in `root` as `tiles.geojson`. If you use this dataset in your research, please cite the following technical report: * https://arxiv.org/abs/2107.12283 .. versionadded:: 0.3 """ md5s: ClassVar[dict[str, str]] = { '025_buildings.csv.gz': '41db2572bfd08628d01475a2ee1a2f17', '04f_buildings.csv.gz': '3232c1c6d45c1543260b77e5689fc8b1', '05b_buildings.csv.gz': '4fc57c63bbbf9a21a3902da7adc3a670', '093_buildings.csv.gz': '00fce146dadf0b30255e750c4c5ac2de', '095_buildings.csv.gz': 'f5765b0936f7ccbd0b4abed60d994f08', '0c3_buildings.csv.gz': '013b130fe872387e0cff842399b423de', '0c3_buildings.csv': 'a697ad2433e9a9f6001de25b4664651a', '0c5_buildings.csv.gz': '16ca283e9344e9da8b47acaf03c1c6e4', '0c7_buildings.csv.gz': 'b3774930006497a80c8a2fbf33056610', '0d1_buildings.csv.gz': '41e652218ca5964d297d9cd1d84b831c', '0d7_buildings.csv.gz': 'd365fe47d10b0756dd54ceca24598d8e', '0d9_buildings.csv.gz': '3ebd47fa4f86857266e9a7346d6aa163', '0db_buildings.csv.gz': '368213e9caa7ee229ef9403b0ca8c80d', '0dd_buildings.csv.gz': '8f5fcefff262fdfd82800092d2e9d841', '0df_buildings.csv.gz': 'cbb5f63b10daa25568bdde8d9f66f8a4', '0e1_buildings.csv.gz': 'a9b9bf1e541b62c8a34d2f6f2ae71e1c', '0e3_buildings.csv.gz': '3d9c2ffc11c02aec2bd008699f9c4bd1', '0e5_buildings.csv.gz': '1e1b2bf63dfc520e62e4b68db23fe64c', '0e7_buildings.csv.gz': 'c96797588c90e66268367cb56b4b9af8', '0e9_buildings.csv.gz': 'c53bb7bbc8140034d1be2c49ff49af68', '0eb_buildings.csv.gz': '407c771f614a15d69d78f1e25decf694', '0ed_buildings.csv.gz': 'bddd10992d291677019d7106ce1f4fac', '0ef_buildings.csv.gz': 'd1b91936e7ac06c661878ef9eb5dba7b', '0f1_buildings.csv.gz': '9d86eb10d2d8766e1385b6c52c11d5e2', '0f9_buildings.csv.gz': '1c6775131214b26f4a27b4c42d6e9fca', '0fb_buildings.csv.gz': 'd39528cb4e0cbff589ca89dc86d9b5db', '0fd_buildings.csv.gz': '304fe4a60e950c900697d975098f7536', '0ff_buildings.csv.gz': '266ca7ed1ad0251b3999b0e2e9b54648', '103_buildings.csv.gz': '8d3cafab5f1e02b2a0a6180eb34d1cac', '105_buildings.csv.gz': 'dd61cc74239aa9a1b30f10859122807b', '107_buildings.csv.gz': '823c05984f859a1bf17af8ce78bf2892', '109_buildings.csv.gz': 'cfdee0e807168cd1c183d9c01535369b', '10b_buildings.csv.gz': 'd8ecaf406abd864b641ba34985f3042e', '10d_buildings.csv.gz': 'af584a542a17942ff7e94653322dba87', '10f_buildings.csv.gz': '3d5369e15c4d1f59fb38cf61f4e6290b', '111_buildings.csv.gz': '47504e43d1b67101bed5d924225328dc', '113_buildings.csv.gz': '3f991c831569f91f34eaa8fc3882b2fd', '117_buildings.csv.gz': 'a4145fa6e458480e30c807f80ae5cd65', '119_buildings.csv.gz': '5661b7ac23f266542c7e0d962a8cae58', '11b_buildings.csv.gz': '41b6d036610d0bddac069ec72e68710e', '11d_buildings.csv.gz': '1ef75e9d176dd8d6bfa6012d36b1d25c', '11f_buildings.csv.gz': 'f004873d1ef3933c1716ab6409565b7d', '121_buildings.csv.gz': '0c7e7a9043ed069fbdefdcfcfc437482', '123_buildings.csv.gz': 'c46bd53b67025c3de11657220cce0aec', '125_buildings.csv.gz': '33253ae1a82656f4eedca9bd86f981a3', '127_buildings.csv.gz': '2f827f8fc93485572178e9ad0c65e22d', '129_buildings.csv.gz': '74f98346990a1d1e41241ce8f4bb201a', '12f_buildings.csv.gz': 'b1b0777296df2bfef512df0945ca3e14', '131_buildings.csv.gz': '8362825b10c9396ecbb85c49cd210bc6', '137_buildings.csv.gz': '96da7389df820405b0010db4a6c98c61', '139_buildings.csv.gz': 'c41e26fc6f3565c3d7c66ab977dc8159', '13b_buildings.csv.gz': '981d4ccb0f41a103bdad8ef949eb4ffe', '13d_buildings.csv.gz': 'd15585d06ee74b0095842dd887197035', '141_buildings.csv.gz': 'ae0bf17778d45119c74e50e06a04020d', '143_buildings.csv.gz': '9699809e57eb097dfaf9d484f1d9c5fa', '145_buildings.csv.gz': '81e74e0165ea358278ce18507dddfdb0', '147_buildings.csv.gz': '39edad15fa16c432f5d460f0a8166032', '149_buildings.csv.gz': '94bf8f8fa221744fb1d57c7d4065e69e', '14f_buildings.csv.gz': 'ca8410be89b5cf868c2a67861712e4ea', '15b_buildings.csv.gz': '8c0071c0ae20a60e8dd4d7aa6aac5a99', '15d_buildings.csv.gz': '35f044a323556adda5f31e8fc9307c85', '161_buildings.csv.gz': 'ba08b70a26f07b5e2cd4eafd9d6f826b', '163_buildings.csv.gz': '2bec83a2504b531cd1cb0311fcb6c952', '165_buildings.csv.gz': '48f934733dd3054164f9b09abee63312', '167_buildings.csv.gz': 'bba8657024d80d44e475759b65adc969', '169_buildings.csv.gz': '13e142e48597ee7a8b0b812e226dfa72', '16b_buildings.csv.gz': '9c62351d6cc8eaf761ab89d4586d26d6', '16d_buildings.csv.gz': 'a33c23da3f603c8c3eacc5e6a47aaf66', '16f_buildings.csv.gz': '4850dd7c8f0fb628ba5864ea9f47647b', '171_buildings.csv.gz': '4217f1b025db869c8bed1014704c2a79', '173_buildings.csv.gz': '5a5f3f07e261a9dc58c6180b69130e4a', '175_buildings.csv.gz': '5bbf7a7c8f57d28e024ddf8f4039b575', '177_buildings.csv.gz': '76cd4b17d68d62e1f088f229b65f8acf', '179_buildings.csv.gz': 'a5a1c6609483336ddff91b2385e70eb9', '17b_buildings.csv.gz': 'a47c1145a3b0bcdaba18c153b7b92b87', '17d_buildings.csv.gz': '3226d0abf396f44c1a436be83538dfd8', '17f_buildings.csv.gz': '3e18d4fc5837ee89274d30f2126b92b2', '181_buildings.csv.gz': 'c87639d7f6d6a85a3fa6b06910b0e145', '183_buildings.csv.gz': 'e94438ebf19b3b25035954d23a0e90cf', '185_buildings.csv.gz': '8de8d1d50c16c575f85b96dee474cb56', '189_buildings.csv.gz': 'da94cd495a99496fd687bbb4a1715c90', '18b_buildings.csv.gz': '9ab353335fe6ff694e834889be2b305d', '18d_buildings.csv.gz': 'e37e0f868ce96f7d14f7bf1a301da1d3', '18f_buildings.csv.gz': 'e9000b9ef9bb0f838088e96becfc95a1', '191_buildings.csv.gz': 'c00bb4d6b2b12615d576c06fe545cbfa', '193_buildings.csv.gz': 'd48d4c03ef053f6987b3e6e9e78a8b03', '195_buildings.csv.gz': 'd93ab833e74480f07a5ccf227067db5a', '197_buildings.csv.gz': '8667e040f9863e43924aafe6071fabc7', '199_buildings.csv.gz': '04ba65a4caf16cc1e0d5c4e1322c5885', '19b_buildings.csv.gz': 'e49412e3e1bccceb0bdb4df5201288f4', '19d_buildings.csv.gz': '92b5fb4e96529d90e99c788e3e8696d4', '19f_buildings.csv.gz': 'c023f6c37d0026b56f530b841517a6cd', '1a1_buildings.csv.gz': '471483b50c722af104af8a582e780c04', '1a3_buildings.csv.gz': '0a453053f1ff53f9e165e16c7f97354a', '1a5_buildings.csv.gz': '1f6a823e223d5f29c66aa728933de684', '1a7_buildings.csv.gz': '6130b724501fa16e6d84e484c4091f1f', '1a9_buildings.csv.gz': '73022e8e7b994e76a58cc763a057d542', '1b9_buildings.csv.gz': '48dea4af9d12b755e75b76c68c47de6b', '1bb_buildings.csv.gz': 'dfb9ee4d3843d81722b70f7582c775a4', '1bd_buildings.csv.gz': 'fdea2898fc50ae25b6196048373d8244', '1bf_buildings.csv.gz': '96ef27d6128d0bcdfa896fed6f27cdd0', '1c1_buildings.csv.gz': '32e3667d939e7f95316eb75a6ffdb603', '1c3_buildings.csv.gz': 'ed94b543da1bbe3101ed66f7d7727d24', '1c5_buildings.csv.gz': 'ce527ab33e564f0cc1b63ae467932a18', '1c7_buildings.csv.gz': 'd5fb474466d6a11d3b08e3a011984ada', '1dd_buildings.csv.gz': '9e7e50e3f95b3f2ceff6351b75ca1e75', '1e5_buildings.csv.gz': 'f95ea85fce47ce7edf5729086d43f922', '1e7_buildings.csv.gz': '2bca5682c48134e69b738d70dfe7d516', '1e9_buildings.csv.gz': 'f049ad06dbbb200f524b4f50d1df8c2e', '1eb_buildings.csv.gz': '6822d7f202b453ec3cc03fb8f04691ad', '1ed_buildings.csv.gz': '9dfc560e2c3d135ebdcd46fa09c47169', '1ef_buildings.csv.gz': '506e7772c35b09cfd3b6f8691dc2947d', '1f1_buildings.csv.gz': 'b74f2b585cfad3b881fe4f124080440a', '1f3_buildings.csv.gz': '12896642315320e11ed9ed2d3f0e5995', '1f5_buildings.csv.gz': '334aea21e532e178bf5c54d028158906', '1f7_buildings.csv.gz': '0e8c3d2e005eb04c6852a8aa993f5a76', '217_buildings.csv.gz': '296e9ba121fea752b865a48e5c0fe8a5', '219_buildings.csv.gz': '1d19b6626d738f7706f75c2935aaaff4', '21d_buildings.csv.gz': '28bfca1f8668f59db021d3a195994768', '21f_buildings.csv.gz': '06325c8b0a8f6ed598b7dc6f0bb5adf2', '221_buildings.csv.gz': 'a354ffc1f7226d525c7cf53848975da1', '223_buildings.csv.gz': '3bda1339d561b3bc749220877f1384d9', '225_buildings.csv.gz': '8eb02ad77919d9e551138a14d3ad1bbc', '227_buildings.csv.gz': 'c07aceb7c81f83a653810befa0695b61', '22f_buildings.csv.gz': '97d63e30e008ec4424f6b0641b75377c', '231_buildings.csv.gz': 'f4bc384ed74552ddcfe2e69107b91345', '233_buildings.csv.gz': '081756e7bdcfdc2aee9114c4cfe62bd8', '23b_buildings.csv.gz': '75776d3dcbc90cf3a596664747880134', '23d_buildings.csv.gz': 'e5d0b9b7b14601f58cfdb9ea170e9520', '23f_buildings.csv.gz': '77f38466419b4d391be8e4f05207fdf5', '3d1_buildings.csv.gz': '6659c97bd765250b0dee4b1b7ff583a9', '3d5_buildings.csv.gz': 'c27d8f6b2808549606f00bc04d8b42bc', '3d7_buildings.csv.gz': 'abdef2e68cc31c67dbb6e60c4c40483e', '3d9_buildings.csv.gz': '4c06ae37d8e76626345a52a32f989de9', '3db_buildings.csv.gz': 'e83ca0115eaf4ec0a72aaf932b00442a', 'b5b_buildings.csv.gz': '5e5f59cb17b81137d89c4bab8107e837', } filename_glob = '*_buildings.csv' zipfile_glob = '*_buildings.csv.gz' meta_data_url = 'https://sites.research.google/open-buildings/tiles.geojson' meta_data_filename = 'tiles.geojson'
[docs] def __init__( self, paths: Path | Iterable[Path] = 'data', crs: CRS | None = None, res: float | tuple[float, float] = 0.0001, transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None, checksum: bool = False, ) -> None: """Initialize a new Dataset instance. Args: paths: one or more root directories to search or files to load crs: :term:`coordinate reference system (CRS)` to warp to (defaults to the CRS of the first file found) res: resolution of the dataset in units of CRS in (xres, yres) format. If a single float is provided, it is used for both the x and y resolution. transforms: a function/transform that takes input sample and its target as entry and returns a transformed version checksum: if True, check the MD5 of the downloaded files (may be slow) Raises: DatasetNotFoundError: If dataset is not found. .. versionchanged:: 0.5 *root* was renamed to *paths*. """ self.paths = paths if isinstance(res, int | float): res = (res, res) self.res = res self.checksum = checksum self.transforms = transforms self._verify() assert isinstance(self.paths, str | os.PathLike) with open(os.path.join(self.paths, 'tiles.geojson')) as f: data = json.load(f) features = data['features'] features_filenames = [ feature['properties']['tile_url'].split('/')[-1] for feature in features ] # get csv filename polygon_files = glob.glob(os.path.join(self.paths, self.zipfile_glob)) polygon_filenames = [f.split(os.sep)[-1] for f in polygon_files] matched_features = [ feature for filename, feature in zip(features_filenames, features) if filename in polygon_filenames ] filepaths = [] datetimes = [] geometries = [] source_crs = CRS.from_epsg(4326) for feature in matched_features: if crs is None: crs = source_crs filepath = os.path.join( self.paths, feature['properties']['tile_url'].split('/')[-1] ) mint = pd.Timestamp.min maxt = pd.Timestamp.max c = feature['geometry']['coordinates'][0] xs = [x[0] for x in c] ys = [x[1] for x in c] minx, miny, maxx, maxy = min(xs), min(ys), max(xs), max(ys) (minx, maxx), (miny, maxy) = fiona.transform.transform( source_crs.to_wkt(), crs.to_wkt(), [minx, maxx], [miny, maxy] ) filepaths.append(filepath) datetimes.append((mint, maxt)) geometries.append(shapely.box(minx, miny, maxx, maxy)) if not len(filepaths): raise DatasetNotFoundError(self) data = {'filepath': filepaths} index = pd.IntervalIndex.from_tuples(datetimes, closed='both', name='datetime') self.index = GeoDataFrame(data, index=index, geometry=geometries, crs=crs) self._source_crs = source_crs
[docs] def __getitem__(self, query: GeoSlice) -> dict[str, Any]: """Retrieve input, target, and/or metadata indexed by spatiotemporal slice. Args: query: [xmin:xmax:xres, ymin:ymax:yres, tmin:tmax:tres] coordinates to index. Returns: Sample of input, target, and/or metadata at that index. Raises: IndexError: If *query* is not found in the index. """ x, y, t = self._disambiguate_slice(query) interval = pd.Interval(t.start, t.stop) index = self.index.iloc[self.index.index.overlaps(interval)] index = index.iloc[:: t.step] index = index.cx[x.start : x.stop, y.start : y.stop] if index.empty: raise IndexError( f'query: {query} not found in index with bounds: {self.bounds}' ) shapes = self._filter_geometries(query, index.filepath) # Rasterize geometries width = (x.stop - x.start) / x.step height = (y.stop - y.start) / y.step transform = rasterio.transform.from_bounds( x.start, y.start, x.stop, y.stop, width, height ) if shapes: masks = rasterio.features.rasterize( shapes, out_shape=(round(height), round(width)), transform=transform ) masks = torch.tensor(masks).unsqueeze(0) else: masks = torch.zeros(size=(1, round(height), round(width))) sample = {'mask': masks, 'crs': self.crs, 'bounds': query} if self.transforms is not None: sample = self.transforms(sample) return sample
def _filter_geometries( self, query: GeoSlice, filepaths: list[str] ) -> list[dict[str, Any]]: """Filters a df read from the polygon csv file based on query and conf thresh. Args: query: [xmin:xmax:xres, ymin:ymax:yres, tmin:tmax:tres] coordinates to index. filepaths: filepaths to files that were hits from rmtree index Returns: List with all polygons from all hit filepaths """ x, y, t = self._disambiguate_slice(query) # We need to know the bounding box of the query in the source CRS (minx, maxx), (miny, maxy) = fiona.transform.transform( self.crs.to_wkt(), self._source_crs.to_wkt(), [x.start, x.stop], [y.start, y.stop], ) df_query = ( f'longitude >= {minx} & longitude <= {maxx} & ' f'latitude >= {miny} & latitude <= {maxy}' ) shapes = [] for f in filepaths: csv_chunks = pd.read_csv(f, chunksize=200000, compression='gzip') for chunk in csv_chunks: df = chunk.query(df_query) # Warp geometries to requested CRS polygon_series = df['geometry'].map(self._wkt_fiona_geom_transform) shapes.extend(polygon_series.values.tolist()) return shapes def _wkt_fiona_geom_transform(self, x: str) -> dict[str, Any]: """Function to transform a geometry string into new crs. Args: x: Polygon string Returns: transformed geometry in geojson format """ x = json.dumps(shapely.geometry.mapping(wkt.loads(x))) x = json.loads(x.replace("'", '"')) import fiona if hasattr(fiona, 'model'): import fiona.model geom = fiona.model.Geometry(**x) else: geom = x transformed: dict[str, Any] = fiona.transform.transform_geom( self._source_crs.to_wkt(), self.crs.to_wkt(), geom ) return transformed def _verify(self) -> None: """Verify the integrity of the dataset.""" # Check if the zip files have already been downloaded and checksum assert isinstance(self.paths, str | os.PathLike) pathname = os.path.join(self.paths, self.zipfile_glob) i = 0 for zipfile in glob.iglob(pathname): filename = os.path.basename(zipfile) if self.checksum and not check_integrity(zipfile, self.md5s[filename]): raise RuntimeError(f'Dataset found, but corrupted: {filename}.') i += 1 if i != 0: return raise DatasetNotFoundError(self)
[docs] def plot( self, sample: dict[str, Any], show_titles: bool = True, suptitle: str | None = None, ) -> Figure: """Plot a sample from the dataset. Args: sample: a sample returned by :meth:`__getitem__` show_titles: flag indicating whether to show titles above each panel suptitle: optional string to use as a suptitle Returns: a matplotlib Figure with the rendered sample """ mask = sample['mask'].permute(1, 2, 0) showing_predictions = 'prediction' in sample if showing_predictions: pred = sample['prediction'].permute(1, 2, 0) ncols = 2 else: ncols = 1 fig, axs = plt.subplots(nrows=1, ncols=ncols, figsize=(ncols * 4, 4)) if showing_predictions: axs[0].imshow(mask) axs[0].axis('off') axs[1].imshow(pred) axs[1].axis('off') if show_titles: axs[0].set_title('Mask') axs[1].set_title('Prediction') else: axs.imshow(mask) axs.axis('off') if show_titles: axs.set_title('Mask') if suptitle is not None: plt.suptitle(suptitle) return fig

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources