parent
86f4cafca5
commit
f13b7d861a
10 changed files with 556 additions and 204 deletions
@ -1,3 +1,5 @@ |
||||
experiments |
||||
d3.min.js |
||||
node_modules |
||||
data/meteo/txt |
||||
data/stations/xml |
||||
|
@ -1,47 +0,0 @@ |
||||
var stations = require('./server/stations.js'); |
||||
var meteo = require('./server/meteorological.js'); |
||||
var Promise = require('es6-promise').Promise; |
||||
var chalk = require('chalk'); |
||||
|
||||
//===== Weather data scraper
|
||||
// var date = new Date();
|
||||
|
||||
var len = stations.stations.length; |
||||
|
||||
//===== Meteorological data scraping - careful!
|
||||
(function scrapeMeteo(currentStation) { |
||||
if (currentStation !== 0) { |
||||
console.log(chalk.cyan('=== Finished.\n')); |
||||
} |
||||
|
||||
if (currentStation >= len) { |
||||
// if (currentStation > 1) {
|
||||
return; |
||||
} |
||||
|
||||
var arr = []; |
||||
|
||||
console.log(chalk.cyan('\n=== (' + currentStation + ') Starting ' + stations.stations[currentStation])); |
||||
|
||||
// Monthly files
|
||||
// for (var m = 0; m < 12; m++) {
|
||||
// arr.push(meteo.getInconsistent(stations.stations[currentStation], m, 2015));
|
||||
// arr.push(meteo.getMonth(stations.stations[currentStation], m));
|
||||
// }
|
||||
|
||||
// TODO refactor to above format
|
||||
// TODO refactor station requests into promises format
|
||||
// for (var year = 1982; year < date.getFullYear() - 1; year++) {
|
||||
// getYear(stationIDs[i], year);
|
||||
// }
|
||||
|
||||
// getCurrent(stationIDs[i]);
|
||||
|
||||
// Wait for all to resolve, then recurse.
|
||||
Promise.all(arr).then(scrapeMeteo.bind(null, currentStation + 1)); |
||||
|
||||
})(0); |
||||
|
||||
//===== Station data scraping - shouldn't need to change often.
|
||||
// stations.downloadAllMetadata();
|
||||
// stations.parseAllMetadata();
|
@ -0,0 +1,48 @@ |
||||
'use strict'; |
||||
|
||||
(function() { |
||||
var stations = require('./server/stations'); |
||||
var meteo = require('./server/meteo'); |
||||
var assemble = require('./server/assemble'); |
||||
var IO = require('./server/io'); |
||||
var chalk = require('chalk'); |
||||
|
||||
function next(index) { |
||||
if (index >= stations.ids.length) { |
||||
console.log(chalk.green("Finished.")); |
||||
return; |
||||
} |
||||
|
||||
console.log('=== Starting next promise chain.'); |
||||
|
||||
var station = 46026 //stations.ids[index];
|
||||
|
||||
Promise.resolve() |
||||
//===== Download
|
||||
// .then(stations.getMetadata.bind(null, station))
|
||||
// .then(meteo.getAllYears.bind(null, station, 1982, 2014))
|
||||
// .then(meteo.getAllMonths.bind(null, station, 2015))
|
||||
// .then(meteo.getAllNewest.bind(null, station, 2015))
|
||||
|
||||
//===== Parse
|
||||
// .then(stations.parseStation.bind(null, station))
|
||||
// .then(meteo.parseAllMonths.bind(null, 46026, 2015))
|
||||
// .then(meteo.parseAllYears.bind(null, station, 1982, 2014))
|
||||
|
||||
//===== Assemble
|
||||
.then(assemble.read.bind(null, station, 2015)) |
||||
.then(assemble.getAverages) |
||||
|
||||
// .then(function() { console.log('something') })
|
||||
|
||||
//===== Flow control
|
||||
.then(function() { console.log('=== Chain complete.\n'); }) |
||||
// .then(next.bind(null, index + 1))
|
||||
.catch(IO.error); |
||||
|
||||
}; |
||||
|
||||
// TODO remove previous years from file, such as 46026-2015 shouldn't have data from 2014.
|
||||
|
||||
next(0); |
||||
})(); |
@ -0,0 +1,166 @@ |
||||
'use strict'; |
||||
|
||||
var IO = require('./io'); |
||||
var meteo = require('./meteo'); |
||||
|
||||
// [
|
||||
// {
|
||||
// id: str
|
||||
// name: str
|
||||
// lat: str
|
||||
// lon: str
|
||||
|
||||
// avg1982: {
|
||||
// d: int[365] || null,
|
||||
// w: int[52] || null,
|
||||
// m: int[12] || null,
|
||||
// y: int || null
|
||||
// },
|
||||
|
||||
// avg1983: ...
|
||||
// },
|
||||
//
|
||||
// {
|
||||
// id: str
|
||||
// ...
|
||||
// }
|
||||
// ]
|
||||
|
||||
|
||||
module.exports = { |
||||
/** |
||||
* |
||||
*/ |
||||
read: function(station, year) { |
||||
return IO.read(meteo.dirs.json + station + '-' + year + '.json') |
||||
.then(module.exports.parse); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
parse: function(str) { |
||||
var json = {}; |
||||
|
||||
try { |
||||
json = JSON.parse(str); |
||||
} catch(e) { |
||||
IO.error(e); |
||||
} |
||||
|
||||
return json; |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getStation: function() { |
||||
|
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getYearlyAverage: function(arr, col) { |
||||
var sum = 0; |
||||
var count = 0; |
||||
|
||||
console.log('Yearly average for column ' + col + '.'); |
||||
|
||||
arr.forEach(function(row) { |
||||
sum += parseInt(row[col]); |
||||
count++; |
||||
}); |
||||
|
||||
var avg = Math.round((sum / count) * 10) / 10 || 0; |
||||
return avg; |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getMonthlyAverages: function(arr, col) { |
||||
var sum, count; |
||||
var months = []; |
||||
var averages = []; |
||||
|
||||
console.log('Monthly averages for column ' + col + '.'); |
||||
|
||||
for (var i = 0; i < 12; i++) { |
||||
months[i] = []; |
||||
} |
||||
|
||||
// Assemble all the values for each month.
|
||||
arr.forEach(function(row) { |
||||
months[row[1] - 1].push(row[col]); |
||||
}); |
||||
|
||||
// Get the average for each collection of values in each day of the year.
|
||||
months.forEach(function(values, index) { |
||||
sum = 0; |
||||
count = 0; |
||||
|
||||
values.map(function(val) { |
||||
sum += parseInt(val); |
||||
count++; |
||||
}); |
||||
|
||||
averages[index] = Math.round((sum / count) * 10) / 10 || 0; |
||||
}); |
||||
|
||||
return averages; |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getDailyAverages: function(arr, col) { |
||||
var sum, count, a, b, doy; |
||||
var days = []; |
||||
var averages = []; |
||||
var dayms = 1000 * 60 * 60 * 24; |
||||
|
||||
console.log('Daily averages for column ' + col + '.'); |
||||
|
||||
for (var i = 0; i <= 365; i++) { |
||||
days[i] = []; |
||||
} |
||||
|
||||
// Assemble all the values for each day of the year.
|
||||
arr.forEach(function(row) { |
||||
a = new Date(row[0], row[1] - 1, row[2]); |
||||
b = new Date(row[0], 0, 1); |
||||
doy = Math.ceil((a - b) / dayms); |
||||
|
||||
days[doy].push(row[col]); |
||||
}); |
||||
|
||||
// Get the average for each collection of values in each day of the year.
|
||||
days.forEach(function(values, index) { |
||||
sum = 0; |
||||
count = 0; |
||||
|
||||
values.map(function(val) { |
||||
sum += parseInt(val); |
||||
count++; |
||||
}); |
||||
|
||||
averages[index] = Math.round((sum / count) * 10) / 10 || 0; |
||||
}); |
||||
|
||||
return averages; |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getAverages: function(arr) { |
||||
var columnToAverage = 14; |
||||
|
||||
// module.exports.getDailyAverages(arr, columnToAverage);
|
||||
// module.exports.getMonthlyAverages(arr, columnToAverage);
|
||||
module.exports.getYearlyAverage(arr, columnToAverage); |
||||
|
||||
return null; |
||||
} |
||||
}; |
@ -0,0 +1,44 @@ |
||||
var fs = require('fs'); |
||||
var chalk = require('chalk'); |
||||
|
||||
/** |
||||
* File IO, error reporting. |
||||
*/ |
||||
module.exports = { |
||||
/** |
||||
* |
||||
*/ |
||||
read: function(file, aaa, bbb) { |
||||
return new Promise(function(resolve) { |
||||
fs.readFile(file, 'utf8', function(err, str) { |
||||
console.log('Read ' + file); |
||||
module.exports.error(err); |
||||
resolve(str || ''); |
||||
}); |
||||
}); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
write: function(file, str) { |
||||
return new Promise(function(resolve) { |
||||
if (str) { |
||||
fs.writeFile(file, str, module.exports.error, resolve); |
||||
console.log('Write ' + file); |
||||
} |
||||
else { |
||||
resolve(); |
||||
} |
||||
}); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
error: function(e) { |
||||
if (e !== null) { |
||||
console.log(chalk.yellow(e)); |
||||
} |
||||
} |
||||
}; |
@ -0,0 +1,165 @@ |
||||
'use strict' |
||||
|
||||
var downloader = require('./downloader.js'); |
||||
var IO = require('./io.js'); |
||||
var NOAA = require('./noaa.js'); |
||||
|
||||
module.exports = { |
||||
//=========================
|
||||
// Read-only vars
|
||||
//=========================
|
||||
dirs: { |
||||
txt: 'data/meteo/txt/', |
||||
json: 'data/meteo/json/' |
||||
}, |
||||
|
||||
months: [null, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], |
||||
|
||||
//==========================
|
||||
// Downloads
|
||||
//==========================
|
||||
/** |
||||
* |
||||
*/ |
||||
getYear: function(station, year) { |
||||
var path = module.exports.dirs.txt + station + '/'; |
||||
var filename = station + 'h' + year + '.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/view_text_file.php?'+ |
||||
'filename=' + filename + '.gz&dir=data/historical/stdmet/'; |
||||
|
||||
downloader.mkdir(path); |
||||
return downloader.download(url, path + filename); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getMonth: function(station, month, year) { |
||||
var path = module.exports.dirs.txt + station + '/'; |
||||
var filename = station + month.toString(16) + year + '.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/view_text_file.php?' + |
||||
'filename=' + filename + '.gz&dir=data/stdmet/' + module.exports.months[month] + '/'; |
||||
|
||||
downloader.mkdir(path); |
||||
return downloader.download(url, path + filename); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getNewest: function(station, month, year) { |
||||
var path = module.exports.dirs.txt + station + '/'; |
||||
var filename = station + month.toString(16) + year + '-newest.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/data/stdmet/' + module.exports.months[month] + '/' + station + '.txt'; |
||||
|
||||
downloader.mkdir(path); |
||||
return downloader.download(url, path + filename); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getCurrent: function(station) { |
||||
var filename = station + '.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/data/realtime2/' + filename; |
||||
|
||||
return downloader.download(url, dir + filename); |
||||
}, |
||||
|
||||
//==========================
|
||||
// Transformations
|
||||
//==========================
|
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
parseAllMonths: function(station, year) { |
||||
var arr = []; |
||||
var txtPath = module.exports.dirs.txt + station + '/'; |
||||
var jsonPath = module.exports.dirs.json + '/'; |
||||
|
||||
for (var month = 1; month <= 12; month++) { |
||||
arr.push(IO.read(txtPath + station + month.toString(16) + year + '-newest.txt').then(NOAA.parseTxt)); |
||||
arr.push(IO.read(txtPath + station + month.toString(16) + year + '.txt').then(NOAA.parseTxt)); |
||||
} |
||||
|
||||
return Promise.all(arr) |
||||
.then(NOAA.aggregate) |
||||
.then(NOAA.convert) |
||||
.then(function(str) { |
||||
IO.write(jsonPath + station + '-' + year + '.json', str); |
||||
}); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
parseYear: function(station, year) { |
||||
var txtPath = module.exports.dirs.txt + station + '/'; |
||||
var jsonPath = module.exports.dirs.json + '/'; |
||||
|
||||
return IO.read(txtPath + station + 'h' + year + '.txt') |
||||
.then(NOAA.parseTxt) |
||||
.then(NOAA.convert) |
||||
.then(function(str) { |
||||
IO.write(jsonPath + station + '-' + year + '.json', str); |
||||
}); |
||||
}, |
||||
|
||||
//========================================
|
||||
// Promise Collections used in loops
|
||||
//========================================
|
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getAllYears: function(station, startYear, endYear) { |
||||
var arr = []; |
||||
for (var year = startYear; year <= endYear; year++) { |
||||
arr.push(module.exports.getYear(station, year)); |
||||
} |
||||
|
||||
return Promise.all(arr); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getAllMonths: function(station, year) { |
||||
var arr = []; |
||||
for (var month = 1; month <= 12; month++) { |
||||
arr.push(module.exports.getMonth(station, month, year)); |
||||
} |
||||
|
||||
return Promise.all(arr); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getAllNewest: function(station, year) { |
||||
var arr = []; |
||||
for (var month = 1; month <= 12; month++) { |
||||
arr.push(module.exports.getNewest(station, month, year)); |
||||
} |
||||
|
||||
return Promise.all(arr); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
parseAllYears: function(station, startYear, endYear) { |
||||
var arr = []; |
||||
|
||||
for (var year = startYear; year <= endYear; year++) { |
||||
arr.push(module.exports.parseYear(station, year)); |
||||
} |
||||
|
||||
return Promise.all(arr); |
||||
}, |
||||
}; |
@ -1,68 +0,0 @@ |
||||
'use strict' |
||||
|
||||
var downloader = require('./downloader.js'); |
||||
var dir = 'data/meteorological/' |
||||
var Promise = require('es6-promise').Promise; |
||||
|
||||
module.exports = { |
||||
months: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getYear: function(buoy, yyyy) { |
||||
var filename = buoy + 'h' + yyyy + '.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/view_text_file.php?'+ |
||||
'filename=' + filename + '.gz&dir=data/historical/stdmet/'; |
||||
|
||||
return downloader.download(url, dir + filename); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getMonth: function(buoy, m, yyyy) { |
||||
var month = m + 1; |
||||
month = (month == 10 ? 'a' : month); |
||||
month = (month == 11 ? 'b' : month); |
||||
month = (month == 12 ? 'c' : month); |
||||
|
||||
var filename = buoy + month.toString() + yyyy + '.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/view_text_file.php?' + |
||||
'filename=' + filename + '.gz&dir=data/stdmet/' + this.months[m] + '/'; |
||||
|
||||
var path = dir + buoy + '/'; |
||||
downloader.mkdir(path); |
||||
return downloader.download(url, path + filename); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getCurrent: function(buoy) { |
||||
var filename = buoy + '.txt'; |
||||
|
||||
var url = 'http://www.ndbc.noaa.gov/data/realtime2/' + filename; |
||||
|
||||
return downloader.download(url, dir + filename); |
||||
}, |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
getInconsistent: function(buoy, m) { |
||||
var url = 'http://www.ndbc.noaa.gov/data/stdmet/' + this.months[m] + '/' + buoy + '.txt'; |
||||
|
||||
var month = m + 1; |
||||
month = (month == 10 ? 'a' : month); |
||||
month = (month == 11 ? 'b' : month); |
||||
month = (month == 12 ? 'c' : month); |
||||
|
||||
var path = dir + buoy + '/'; |
||||
downloader.mkdir(path); |
||||
|
||||
return downloader.download(url, path + buoy.toString() + month + '2015-newest.txt'); |
||||
} |
||||
}; |
@ -0,0 +1,88 @@ |
||||
/** |
||||
* NOAA-specific filtering. |
||||
*/ |
||||
module.exports = { |
||||
/** |
||||
* |
||||
*/ |
||||
splitLine: function(str) { |
||||
var arr = str.split(/\s+/); |
||||
arr.filter(function(val) { return (val.length > 0); }) |
||||
return arr; |
||||
}, |
||||
|
||||
/** |
||||
* Receives a stream from a file read event. |
||||
*/ |
||||
parseTxt: function(str) { |
||||
console.log('Parsing NOAA space-delimited columnar data into JSON.'); |
||||
|
||||
var arr = []; |
||||
var cols = null; |
||||
var lines = str.split('\n'); |
||||
var len = lines.length; |
||||
|
||||
if (len > 8) { |
||||
for (var i = 0; i < len; i++) { |
||||
cols = module.exports.splitLine(lines[i]); |
||||
cols.length > 0 ? arr.push(cols) : null; |
||||
} |
||||
} |
||||
|
||||
return arr; |
||||
}, |
||||
|
||||
/** |
||||
* After all files have been parsed, Promises.all passes them all as an array. |
||||
* This function does filtering on them and finalizes a JSON string. |
||||
*/ |
||||
convert: function(arr) { |
||||
console.log('Converting aggregated month files to JSON.'); |
||||
|
||||
// Sort.
|
||||
var sorted = arr.sort(function(a, b) { |
||||
var dateA = parseInt([a[0], a[1], a[2], a[3], ('00' + a[4]).substr(-2)].join('')) || 0; |
||||
var dateB = parseInt([b[0], b[1], b[2], b[3], ('00' + b[4]).substr(-2)].join('')) || 0; |
||||
|
||||
return dateA - dateB; |
||||
}); |
||||
|
||||
// Filter for multiple headings/units rows.
|
||||
var result = sorted.filter(function(row) { |
||||
return !(row[0] === '#YY' || row[0] === '#yr' || row.length === 1); |
||||
}); |
||||
|
||||
// Convert to JSON that can later be read easily.
|
||||
var str = null; |
||||
if (result.length > 0) { |
||||
str = JSON.stringify(result) |
||||
str = str.replace(/\],\[/g, '],\n['); |
||||
} |
||||
|
||||
return str; |
||||
}, |
||||
|
||||
/** |
||||
* Used to aggregate month files after they have been split into a lines array. |
||||
* Each line has been split into individual elements. |
||||
* The array passed to this function is therefore an array of two dimensional arrays. |
||||
* |
||||
* This function adds non-empty lines to a common result set. |
||||
*/ |
||||
aggregate: function(arr) { |
||||
console.log('Aggregating month files for the year.'); |
||||
|
||||
var tmp = []; |
||||
|
||||
arr.forEach(function(rows) { |
||||
if (rows.length === 0) { |
||||
return; |
||||
} |
||||
|
||||
tmp = tmp.concat(rows); |
||||
}); |
||||
|
||||
return tmp; |
||||
} |
||||
}; |
||||
|
Loading…
Reference in new issue