Commit 049a97f6 authored by cst's avatar cst
Browse files

added country and city as options

parent 936a2aca
......@@ -12,7 +12,7 @@ using CsvHelper.Configuration;
namespace eea_e1a_get
{
public class WebClientWithTimeout:WebClient
public class WebClientWithTimeout : WebClient
{
protected override WebRequest GetWebRequest(Uri address)
{
......@@ -21,12 +21,12 @@ namespace eea_e1a_get
return wr;
}
}
public static class EEAService
{
private static string EEA_URL = "https://fme.discomap.eea.europa.eu/fmedatastreaming/AirQualityDownload/AQData_Extract.fmw?CountryCode={0}&CityName=&Pollutant={1}&Year_from={2}&Year_to={2}&Station=&Samplingpoint=&Source={3}&Output=TEXT&UpdateDate=";
private static string METADATA_URL = "https://discomap.eea.europa.eu/map/fme/metadata/PanEuropean_metadata.csv";
private static HttpClient Client = new HttpClient();
private static string EEA_URL = "https://fme.discomap.eea.europa.eu/fmedatastreaming/AirQualityDownload/AQData_Extract.fmw?CountryCode={0}&CityName={1}&Pollutant={2}&Year_from={3}&Year_to={3}&Station=&Samplingpoint=&Source={4}&Output=TEXT&UpdateDate=";
private static string METADATA_URL = "https://discomap.eea.europa.eu/map/fme/metadata/PanEuropean_metadata.csv";
private static HttpClient Client = new HttpClient();
// private static Configuration ConfigMeta = new Configuration
// {
// HasHeaderRecord = true,
......@@ -43,47 +43,47 @@ namespace eea_e1a_get
// IgnoreBlankLines = true,
// };
public static string[] GetDatasources(string country, string source, int year, int pollutantId)
public static string[] GetDatasources(string country, string city, string source, int year, int pollutantId)
{
string eea_url = String.Format(EEA_URL, country, pollutantId, year, source);
WebClientWithTimeout httpClient = new WebClientWithTimeout();
string eea_url = String.Format(EEA_URL, country, city, pollutantId, year, source);
WebClientWithTimeout httpClient = new WebClientWithTimeout();
var str = httpClient.DownloadString(eea_url);
var datasource_urls = str.Split(new[] { "\r\n", "\r", "\n" },StringSplitOptions.RemoveEmptyEntries);
var datasource_urls = str.Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries);
return datasource_urls;
}
public static async Task<List<E1A>> GetE1A(string url)
public static async Task<List<E1A>> GetE1A(string url)
{
using (var stream = await Client.GetStreamAsync(url))
using (var reader = new StreamReader(stream))
using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
{
{
csv.Configuration.HasHeaderRecord = true;
csv.Configuration.HeaderValidated = null;
csv.Configuration.MissingFieldFound = null;
csv.Configuration.IgnoreBlankLines = true;
var records = csv.GetRecords<E1A>().ToList();
csv.Configuration.IgnoreBlankLines = true;
var records = csv.GetRecords<E1A>().ToList();
string resolution = records.First().AveragingTime;
/* bool hasSameAllowedResolution = records.Count == records.Count(p=>p.AveragingTime == resolution && Resolution.AllowedResolutions.Contains(p.AveragingTime));
if(!hasSameAllowedResolution)
throw new Exception("Dataset was either irregular or did not contain an allowed resolution");
*/
var validRecords = records.Where(p=> p.Validity > 0).ToList();
if(validRecords.Count == 0)
var validRecords = records.Where(p => p.Validity > 0).ToList();
if (validRecords.Count == 0)
throw new Exception("Dataset did not contain any valid values");
return validRecords;
}
}
}
public static async Task<List<Metadata>> GetMetadata()
public static async Task<List<Metadata>> GetMetadata()
{
List<Metadata> metadata = new List<Metadata>();
using (var stream = await Client.GetStreamAsync(METADATA_URL))
using (var reader = new StreamReader(stream))
using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
{
{
csv.Configuration.Delimiter = "\t";
while (csv.Read())
{
......@@ -92,10 +92,10 @@ namespace eea_e1a_get
var record = csv.GetRecord<Metadata>();
metadata.Add(record);
}
catch (System.Exception e) {}
catch (System.Exception e) { }
}
}
return metadata;
}
return metadata;
}
}
}
\ No newline at end of file
......@@ -11,209 +11,212 @@ using CsvHelper;
namespace eea_e1a_get
{
public class Program
{
private static HttpClient Client = new HttpClient();
public static async Task Main(string[] args)
public class Program
{
//args: pollutant from_year to_year
int firstYear = args.Length > 1 ? Convert.ToInt32(args[1]) : 2018;
int lastYear = args.Length == 3 ? Convert.ToInt32(args[2]) : firstYear;
string pollutant = args.Length > 1 ? args[0] : "O3";
for (int i = firstYear; i <= lastYear; i++)
{
Console.WriteLine("Processing year " + i);
await Do(i, pollutant);
Console.WriteLine("------------------------");
}
}
private static async Task Do(int year, string pollutant)
{
try
{
string country = "";
string source = year < 2013 ? "Airbase" : "E1a";
Dictionary<string, List<E1A>> data = new Dictionary<string, List<E1A>>();
Console.WriteLine("Downloading metadata...");
List<Metadata> metadata = await EEAService.GetMetadata();
Console.WriteLine("Done");
Console.WriteLine("Downloading datasource links...");
var datasource_urls = EEAService.GetDatasources(country, source, year, GetPollutantId(pollutant));
Console.WriteLine("Done");
int count = 1;
foreach (var url in datasource_urls)
private static HttpClient Client = new HttpClient();
public static async Task Main(string[] args)
{
try
{
var records = await EEAService.GetE1A(url);
// One file can contain multiple resolutions
var resolutions = records.Select(p => p.AveragingTime).Distinct().ToList();
foreach (var resolution in resolutions)
//args: pollutant from_year to_year
//opt: country city
int firstYear = args.Length > 1 ? Convert.ToInt32(args[1]) : 2018;
int lastYear = args.Length > 2 ? Convert.ToInt32(args[2]) : firstYear;
string pollutant = args.Length > 0 ? args[0] : "PM10";
string country = args.Length > 3 ? args[3] : "";
string city = args.Length > 4 ? args[4] : "";
for (int i = firstYear; i <= lastYear; i++)
{
if (!data.ContainsKey(resolution))
data.Add(resolution, new List<E1A>());
data[resolution].AddRange(records.Where(p => p.AveragingTime == resolution));
Console.WriteLine("Processing year " + i);
await Do(i, pollutant, country, city);
Console.WriteLine("------------------------");
}
Console.WriteLine("Downloaded " + count + " of " + datasource_urls.Count() + " datasources");
count++;
}
catch (System.Exception e)
{
using (StreamWriter sw = File.AppendText(source + "-" + year + "-" + pollutant + "-Exception.txt"))
{
sw.WriteLine("Could not parse: " + url);
sw.WriteLine("Exception: " + e.Message);
sw.WriteLine("-----------------------------------------");
}
Console.WriteLine("Could not parse: " + url);
}
}
foreach (var item in data)
private static async Task Do(int year, string pollutant, string country, string city)
{
Resolution resolution = new Resolution(item.Key);
Console.WriteLine("Processing " + resolution.ResolutionAsString);
Console.WriteLine("---");
Console.WriteLine("Grouping timeseries...");
int observationsInAYear = resolution.GetObservationsInAYear(year);
var timeseries =
(from d in item.Value
group d by new { d.SamplingPoint, d.AirQualityStation, d.UnitOfMeasurement } into g
select new Timeserie
{
SamplingPoint = g.Key.SamplingPoint,
AirQualityStation = g.Key.AirQualityStation,
UnitOfMeasurement = g.Key.UnitOfMeasurement,
Resolution = resolution,
Coverage = Math.Round(((g.Count() / Convert.ToDouble(observationsInAYear)) * 100)),
Values = g.ToList()
})
.Where(p => p.Coverage >= 75 && metadata.Any(m => m.SamplingPoint == p.SamplingPoint))
.ToList();
Console.WriteLine("Done");
CSV csv = new CSV();
Console.WriteLine("Adding metadata...");
csv.AddCell("stationid", "samplingpoint", "stationid");
csv.AddCell("areatype", "samplingpoint", "areatype");
csv.AddCell("stationtype", "samplingpoint", "stationtype");
csv.AddCell("latitude", "samplingpoint", "latitude");
csv.AddCell("longitude", "samplingpoint", "longitude");
csv.AddCell("unit", "samplingpoint", "unit");
foreach (var t in timeseries)
{
var m = metadata.Where(p => p.SamplingPoint == t.SamplingPoint).FirstOrDefault();
if (m != null)
try
{
csv.AddCell("stationid", t.SamplingPoint, m.AirQualityStation);
csv.AddCell("stationtype", t.SamplingPoint, m.AirQualityStationType);
csv.AddCell("areatype", t.SamplingPoint, m.AirQualityStationArea);
csv.AddCell("latitude", t.SamplingPoint, m.Latitude);
csv.AddCell("longitude", t.SamplingPoint, m.Longitude);
csv.AddCell("unit", t.SamplingPoint, t.UnitOfMeasurement);
string source = year < 2013 ? "Airbase" : "E1a";
Dictionary<string, List<E1A>> data = new Dictionary<string, List<E1A>>();
Console.WriteLine("Downloading metadata...");
List<Metadata> metadata = await EEAService.GetMetadata();
Console.WriteLine("Done");
Console.WriteLine("Downloading datasource links...");
var datasource_urls = EEAService.GetDatasources(country, city, source, year, GetPollutantId(pollutant));
Console.WriteLine("Done");
int count = 1;
foreach (var url in datasource_urls)
{
try
{
var records = await EEAService.GetE1A(url);
// One file can contain multiple resolutions
var resolutions = records.Select(p => p.AveragingTime).Distinct().ToList();
foreach (var resolution in resolutions)
{
if (!data.ContainsKey(resolution))
data.Add(resolution, new List<E1A>());
data[resolution].AddRange(records.Where(p => p.AveragingTime == resolution));
}
Console.WriteLine("Downloaded " + count + " of " + datasource_urls.Count() + " datasources");
count++;
}
catch (System.Exception e)
{
using (StreamWriter sw = File.AppendText(source + "-" + year + "-" + pollutant + "-Exception.txt"))
{
sw.WriteLine("Could not parse: " + url);
sw.WriteLine("Exception: " + e.Message);
sw.WriteLine("-----------------------------------------");
}
Console.WriteLine("Could not parse: " + url);
}
}
foreach (var item in data)
{
Resolution resolution = new Resolution(item.Key);
Console.WriteLine("Processing " + resolution.ResolutionAsString);
Console.WriteLine("---");
Console.WriteLine("Grouping timeseries...");
int observationsInAYear = resolution.GetObservationsInAYear(year);
var timeseries =
(from d in item.Value
group d by new { d.SamplingPoint, d.AirQualityStation, d.UnitOfMeasurement } into g
select new Timeserie
{
SamplingPoint = g.Key.SamplingPoint,
AirQualityStation = g.Key.AirQualityStation,
UnitOfMeasurement = g.Key.UnitOfMeasurement,
Resolution = resolution,
Coverage = Math.Round(((g.Count() / Convert.ToDouble(observationsInAYear)) * 100)),
Values = g.ToList()
})
.Where(p => p.Coverage >= 75 && metadata.Any(m => m.SamplingPoint == p.SamplingPoint))
.ToList();
Console.WriteLine("Done");
CSV csv = new CSV();
Console.WriteLine("Adding metadata...");
csv.AddCell("stationid", "samplingpoint", "stationid");
csv.AddCell("areatype", "samplingpoint", "areatype");
csv.AddCell("stationtype", "samplingpoint", "stationtype");
csv.AddCell("latitude", "samplingpoint", "latitude");
csv.AddCell("longitude", "samplingpoint", "longitude");
csv.AddCell("unit", "samplingpoint", "unit");
foreach (var t in timeseries)
{
var m = metadata.Where(p => p.SamplingPoint == t.SamplingPoint).FirstOrDefault();
if (m != null)
{
csv.AddCell("stationid", t.SamplingPoint, m.AirQualityStation);
csv.AddCell("stationtype", t.SamplingPoint, m.AirQualityStationType);
csv.AddCell("areatype", t.SamplingPoint, m.AirQualityStationArea);
csv.AddCell("latitude", t.SamplingPoint, m.Latitude);
csv.AddCell("longitude", t.SamplingPoint, m.Longitude);
csv.AddCell("unit", t.SamplingPoint, t.UnitOfMeasurement);
}
else
{
using (StreamWriter sw = File.AppendText(source + "-" + year + "-" + pollutant + "-Exception.txt"))
{
sw.WriteLine("Could not find metadata for " + t.SamplingPoint);
sw.WriteLine("-----------------------------------------");
}
Console.WriteLine("Could not find metadata for " + t.SamplingPoint);
}
}
Console.WriteLine("Done");
Console.WriteLine("Adding dates...");
var defaultValues = timeseries.Select(p => p.SamplingPoint).Prepend("samplingpoint").Distinct().ToDictionary(v => v, v => (object)-990);
DateTime dt = new DateTime(year, 1, 1, 0, 0, 0, DateTimeKind.Utc);
for (int i = 0; i < observationsInAYear; i++)
{
var current = resolution.AddTimeStep(dt, i);
var dateAsString = current.ToString("yyyy-MM-ddTHH:mm:ssZ");
defaultValues["samplingpoint"] = dateAsString;
Dictionary<string, object> cells = new Dictionary<string, object>(defaultValues);
csv.AddRow(current, cells);
}
Console.WriteLine("Done");
Console.WriteLine("Adding values...");
var timevalues = timeseries.SelectMany(p => p.Values).ToList();
foreach (var d in timevalues)
{
if (resolution.ToUniversalTime(d.DatetimeEnd) == new DateTime(2000, 03, 29))
{
string t = "";
}
csv.EditCell(resolution.ToUniversalTime(d.DatetimeEnd), d.SamplingPoint, d.Concentration);
}
Console.WriteLine("Done");
Console.WriteLine("Converting objects...");
var converted = csv.ToObject();
Console.WriteLine("Done");
Console.WriteLine("Writing to csv...");
using (var writer = new StreamWriter(source + "-" + year + "-" + pollutant + "-" + resolution.ResolutionAsString + ".csv"))
using (var c = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
c.WriteRecords(converted);
}
Console.WriteLine("Done");
}
}
else
catch (Exception e)
{
using (StreamWriter sw = File.AppendText(source + "-" + year + "-" + pollutant + "-Exception.txt"))
{
sw.WriteLine("Could not find metadata for " + t.SamplingPoint);
sw.WriteLine("-----------------------------------------");
}
Console.WriteLine("Could not find metadata for " + t.SamplingPoint);
Console.WriteLine("Something unexpected happen");
Console.WriteLine("Exception: " + e.Message);
}
}
Console.WriteLine("Done");
Console.WriteLine("Adding dates...");
var defaultValues = timeseries.Select(p => p.SamplingPoint).Prepend("samplingpoint").Distinct().ToDictionary(v => v, v => (object)-990);
DateTime dt = new DateTime(year, 1, 1, 0, 0, 0, DateTimeKind.Utc);
for (int i = 0; i < observationsInAYear; i++)
{
var current = resolution.AddTimeStep(dt, i);
var dateAsString = current.ToString("yyyy-MM-ddTHH:mm:ssZ");
defaultValues["samplingpoint"] = dateAsString;
Dictionary<string, object> cells = new Dictionary<string, object>(defaultValues);
csv.AddRow(current, cells);
}
Console.WriteLine("Done");
Console.WriteLine("Adding values...");
var timevalues = timeseries.SelectMany(p => p.Values).ToList();
foreach (var d in timevalues)
{
if (resolution.ToUniversalTime(d.DatetimeEnd) == new DateTime(2000, 03, 29))
}
private static dynamic DictionaryToObject(Dictionary<string, object> dict)
{
IDictionary<string, object> eo = new ExpandoObject() as IDictionary<string, object>;
foreach (KeyValuePair<string, object> kvp in dict)
{
string t = "";
eo.Add(kvp);
}
csv.EditCell(resolution.ToUniversalTime(d.DatetimeEnd), d.SamplingPoint, d.Concentration);
}
Console.WriteLine("Done");
Console.WriteLine("Converting objects...");
var converted = csv.ToObject();
Console.WriteLine("Done");
Console.WriteLine("Writing to csv...");
using (var writer = new StreamWriter(source + "-" + year + "-" + pollutant + "-" + resolution.ResolutionAsString + ".csv"))
using (var c = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
c.WriteRecords(converted);
}
Console.WriteLine("Done");
return eo;
}
}
catch (Exception e)
{
Console.WriteLine("Something unexpected happen");
Console.WriteLine("Exception: " + e.Message);
}
}
private static dynamic DictionaryToObject(Dictionary<string, object> dict)
{
IDictionary<string, object> eo = new ExpandoObject() as IDictionary<string, object>;
foreach (KeyValuePair<string, object> kvp in dict)
{
eo.Add(kvp);
}
return eo;
}
private static int GetPollutantId(string pollutant)
{
if (pollutant == "SO2")
return 1;
if (pollutant == "PM2.5")
return 6001;
if (pollutant == "PM10")
return 5;
if (pollutant == "NO2")
return 8;
if (pollutant == "O3")
return 7;
if (pollutant == "NO")
return 38;
return -1;
private static int GetPollutantId(string pollutant)
{
if (pollutant == "SO2")
return 1;
if (pollutant == "PM2.5")
return 6001;
if (pollutant == "PM10")
return 5;
if (pollutant == "NO2")
return 8;
if (pollutant == "O3")
return 7;
if (pollutant == "NO")
return 38;
return -1;
}
}
}
......
......@@ -12,5 +12,5 @@ dotnet publish -c Release -o "dist"
## Run
```
dotnet .\eea-e1a-get.dll O3 2015 2018
dotnet .\eea-e1a-get.dll O3 2015 2018 NO Oslo
```
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment