From 8b95cb4f106b906a362babc2c21b18d57c7a4748 Mon Sep 17 00:00:00 2001 From: Paul J R Date: Mon, 21 Jan 2013 07:04:28 +1100 Subject: [PATCH] added checks against file sizes and so forth --- .gitignore | 1 + lib/cache.js | 86 ++++++++++++++++++++++++++++++++++++++++++++++++-------- lib/router.js | 26 +++++++++++------ repos.conf | 2 +- 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 14d86ad..3adcbce 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /cache +/bug diff --git a/lib/cache.js b/lib/cache.js index 3e9ec77..6b6327d 100644 --- a/lib/cache.js +++ b/lib/cache.js @@ -1,6 +1,7 @@ var fs = require("fs"); var http = require("http"); var url = require("url"); +var path = require("path"); function maintainCache() { // TODO i should check that im already running here and exit if i am @@ -35,8 +36,8 @@ function upstreamRequest(unify) { var headReq = url.parse(xpath); headReq["method"] = "HEAD"; - getup = http.request(xpath, function(res) { - res.setEncoding("utf8"); + getup = http.request(headReq, function(res) { + //res.setEncoding("utf8"); if(!endData) { console.log("status code is ", typeof res.statusCode); @@ -75,9 +76,14 @@ function upstreamRequest(unify) { endData = true; } else { // this is where it gets ugly + var filesize = res.headers["content-length"]; console.log("do ugly write: ", unify); //unify.b.write(data); - getAndService(unify, xpath); + var metafilename = unify.fullPathDirName + "/.meta."+ path.basename(unify.requestFor) +".filesize"; + var metafile = fs.createWriteStream(metafilename); + metafile.write(filesize); + metafile.end(); + getAndService(unify, xpath, filesize); } break; @@ -96,7 +102,11 @@ function upstreamRequest(unify) { exports.upstreamRequest = upstreamRequest; -function getAndService(unify, xpath) { +function getAndService(unify, xpath, filesize) { + + console.log("calling in here with filesize, ", filesize) + unify.b.writeHead(200, {'Content-Length' : filesize}); + if(typeof global.repoproxy.downloads[unify.fullFilePath] != "undefined" && global.repoproxy.downloads[unify.fullFilePath] == 1) { @@ -105,12 +115,13 @@ function getAndService(unify, xpath) { unify.b.end(); } else { global.repoproxy.downloads[unify.fullFilePath] = 1; + http.get(xpath, function(res) { var file = fs.createWriteStream(unify.fullFilePath); - console.log("res: ", res); + //console.log("res: ", res); //res.setEncoding("utf8"); @@ -138,21 +149,70 @@ function getAndService(unify, xpath) { function serviceFile(unify) { // for now, ignore range. + // however we need to check if a metadata file exists describing the filesize, check if its all correct + // and if not, erase the file (and metafile) and forward the request back to upstream request - // file should already exist, so we just poop it out - var inp = fs.createReadStream(unify.fullFilePath); - //inp.setEncoding("utf8"); - inp.on("data", function(data) { - unify.b.write(data); - }); - inp.on("end", function(closed) { - unify.b.end(); + checkFile(unify, function() { + + // file should already exist, so we just poop it out + var inp = fs.createReadStream(unify.fullFilePath); + //inp.setEncoding("utf8"); + inp.on("data", function(data) { + unify.b.write(data); + }); + + inp.on("end", function(closed) { + unify.b.end(); + }); }); } exports.serviceFile = serviceFile; + +function checkFile(unify, callback) { + // in here we do the metadata checks + var metafilename = unify.fullPathDirName + "/.meta."+ path.basename(unify.requestFor) +".filesize"; + + fs.exists(metafilename, function(existence) { + if(existence) { + var fsizef = fs.createReadStream(metafilename); + var fsize = ""; + fsizef.on("data", function(data) { + fsize += data; + }); + + fsizef.on("end", function() { + fs.stat(unify.fullFilePath, function(err, stats) { + var rfsize = stats["size"]; + if(rfsize != fsize.trim()) { + // remove the file and start again + console.log("reported filesizes dont match, '%s', '%s', removing file and starting again", rfsize, stats["size"]); + try { + fs.unlink(metafilename, function(){ + fs.unlink(unify.fullFilePath, function(){ + upstreamRequest(unify); + }) + }); + } catch(e) { + upstreamRequest(unify); + } + } else { + // we're good + unify.b.writeHead(200, {"Content-Length" : unify.fileSize}) + callback(); + } + }); + }); + } else { + console.log("file, '%s' exists but has no filesize meta data, assuming it was put here manually and servicing", unify.fullFilePath); + unify.b.writeHead(200, {"Content-Length" : unify.fileSize}) + callback(); + } + }); +} + function makeCacheDir(path) { console.log("attempting to create... '%s' as '%s'", path.fullPathDirName, path.subPathDirName); diff --git a/lib/router.js b/lib/router.js index 626eb5d..fcf697c 100644 --- a/lib/router.js +++ b/lib/router.js @@ -5,7 +5,9 @@ var path = require("path"); exports.routeRequest = function(req, res) { // first, unify the request + console.log("request: ", req.url); var thisQuery = unifyRequest(req, res, function(unified) { + console.log("unified request is ", unified); if(unified.requestFor == "/favicon.ico") { unified.b.writeHead(404, {"Content-Type": "text/plain"}); unified.b.write("404 Not Found\n"); @@ -20,14 +22,20 @@ exports.routeRequest = function(req, res) { console.log("ERROR: something went majorly wrong with something, ", unified); } } else { - // it doesnt exist yet, so we send it to the cache service - console.log("file doesnt exist, upstream we go: ", unified); - cache.upstreamRequest(unified); + // it doesnt exist yet, so we send it to the cache service if it matches an upstream service + if(typeof global.repoproxy.repo[unified.topPath] != "undefined") { + console.log("file doesnt exist, upstream we go: ", unified); + cache.upstreamRequest(unified); + } else { + unified.b.writeHead(404, {"Content-Type": "text/plain"}); + unified.b.write("404 Not Found\n"); + unified.b.end(); + } } }); } -function unifyRequest(req, res, callback, testing) { +function unifyRequest(req, res, callback) { var unified = new Object(); var originalurl = url.parse(req.url); @@ -76,24 +84,24 @@ function unifyRequest(req, res, callback, testing) { //send a 302 and call it a day res.writeHead("302", { 'Location': unified.originalReq+"/" }); res.end(); - - // TODO: remove this after testing - if(testing) callback(null); - return 302; } if(stats.isDirectory()) { unified.isDirectory = true; unified.isFile = false; + unified.fileSize = null; } else if(stats.isFile()) { unified.isDirectory = false; - unified.isFile = true; + unified.isFile = true; + unified.fileSize = stats["size"]; } else { unified.isDirectory = false; unified.isFile = false; + unified.fileSize = null; } } else { unified.exists = false; + unified.fileSize = null; } callback(unified); diff --git a/repos.conf b/repos.conf index cb13e2f..68fa3f7 100644 --- a/repos.conf +++ b/repos.conf @@ -17,4 +17,4 @@ ploop:asdf # updateinterval is how often repo meta data is refreshed (days) # packageage is how long a package will go unread before it gets deleted (days) repo:fedora:yum:http://ftp.iinet.net.au/pub/fedora/linux/:7:120 -repo:fedora2:yum:http://ftp.iinet.net.au/pub/fedora/linux/:7:120 \ No newline at end of file +repo:ubuntu:apt:http://ftp.iinet.net.au/pub/ubuntu/:7:120 \ No newline at end of file -- 1.7.0.4