added a logger, started working on the maintenance timer routines
[nodejs-repoproxy.git] / lib / cache.js
1 var fs = require("fs");
2 var http = require("http");
3 var url = require("url");
4 var path = require("path");
5 var crypto = require("crypto");
6
7 function upstreamRequest(unify) {
8         // first do a head request
9         console.log("upsteram as ", unify.requestFor);
10         
11         var endData = false;
12         var xpath = "";
13         var filefd = null;
14         if(unify.topPath !=null) if(unify.topPath != "") if(typeof global.repoproxy.repo[unify.topPath] != "undefined") {
15                 var uplink = global.repoproxy.repo[unify.topPath].url;
16                 xpath = uplink + unify.subPath;
17         }
18         
19         //unify.b.write("would send to '" + xpath + "'");
20         //unify.b.end();
21         
22         // not doing this properly yet...
23         if(typeof global.repoproxy.downloads[unify.fullFilePath] != undefined && global.repoproxy.downloads[unify.fullFilePath] == 1) {
24                 console.log("request for file thats being downloaded already, doing inline request");
25                 inlineService(unify);
26                 return;
27         }
28         
29         console.log("sending off to '%s'", xpath);
30         
31         var headReq = url.parse(xpath);
32         headReq["method"] = "HEAD";
33         
34         getup = http.request(headReq, function(res) {
35                 //res.setEncoding("utf8");
36                 
37                 if(!endData) {
38                         console.log("status code is ", typeof res.statusCode);
39                         switch(res.statusCode) {
40                         // TODO: this 301 directory redirect thing needs to work better
41                         case 301:
42                         case 302:
43                                 
44                                 var loc = res.headers.location.substr(res.headers.location.length-4);
45                                 var against_t = xpath + "/";
46                                 var against = against_t.substr(against_t.length-4);
47                                 
48                                 if(loc == against) {
49                                         console.log("got a redirect, upstream for loc => loc/ assuming its a directory");
50                                         makeCacheDir(unify);
51                                         unify.b.writeHead(302, { "Location": unify.originalReq + "/" });
52                                 } else {
53                                         console.log("checked '%s' against '%s', was false, sending 404", loc, against);
54                                         unify.b.writeHead(404, {"Content-Type": "text/plain"});
55                                         unify.b.write("404 Not Found\n");
56                                 }
57                                 unify.b.end();
58                                 endData = true;
59                                 break;
60                                 
61                         case 404:
62                                 unify.b.writeHead(404, {"Content-Type": "text/plain"});
63                                 unify.b.write("404 Not Found\n");
64                                 unify.b.end();
65                                 endData = true;
66                                 break;
67                         case 200:
68                                 makeCacheDir(unify);
69                                 if(unify.isDirectoryRequest) {
70                                         serviceDirectory(unify);                                        
71                                         endData = true;
72                                 } else {
73                                         // this is where it gets ugly
74                                         var filesize = res.headers["content-length"];
75                                         console.log("do ugly write: ", unify);
76                                         //unify.b.write(data);
77                                         var metafilename = unify.fullPathDirName + "/.meta."+ path.basename(unify.requestFor) +".filesize";
78                                         var metafile = fs.createWriteStream(metafilename);
79                                         metafile.write(filesize);
80                                         metafile.end();
81                                         getAndService(unify, xpath, filesize);
82                                         
83                                 }
84                                 break;
85                         default:
86                                 console.log(".... data");
87                                 //unify.b.write(data);
88                         }
89                 }               
90                 //console.log("res is now ", res);
91         });
92         
93         getup.end();
94         
95         //console.log("getup: ", getup);
96 }
97
98 exports.upstreamRequest = upstreamRequest;
99
100 function getAndService(unify, xpath, filesize) {
101         
102         console.log("calling in here with filesize, ", filesize)
103         unify.b.writeHead(200, {'Content-Length' : filesize});
104
105         
106         global.repoproxy.downloads[unify.fullFilePath] = 1;
107         
108
109         http.get(xpath, function(res) {
110
111             var file = fs.createWriteStream(unify.fullFilePath);
112         
113             //console.log("res: ", res);
114         
115             //res.setEncoding("utf8");
116         
117             res.on("data", function(data) {
118                     //console.log("chunk");
119                     file.write(data);
120                     unify.b.write(data);
121             });
122         
123             res.on("end", function() {
124                     console.log("end...");
125                     unify.b.end();
126                     file.end();
127                     global.repoproxy.downloads[unify.fullFilePath] = 0;
128             });
129             
130             res.on("error", function(err) {
131                 console.log("res threw error... ", err);
132             });
133         });
134 }
135
136 // this is nasty nasty thing that can go horribly wrong in some ways, but currently works...
137 function inlineService(unify) {
138         // this method is called when we need to service a file thats being downloaded by something else
139         var metafilename = unify.fullPathDirName + "/.meta."+ path.basename(unify.requestFor) +".filesize";
140         var fsizef = fs.createReadStream(metafilename);
141         var fsize = "";
142         var lastchunk = 0;
143         fsizef.on("data", function(data) {
144                 fsize += data;
145         });
146         
147         fsizef.on("end", function() {
148                 var sentSoFar = 0;
149                 unify.b.writeHead(200, {"Content-Length" : fsize });
150                 
151                 // now we go into the file reading loop.
152                 console.log("start of inline services");
153                 // we loop every 0.5s and do our thing
154                 
155                 function sendPieces() {
156                         // this is going to be so fun i want to play real life frogger in real life traffic...
157                         fs.stat(unify.fullFilePath, function(err, stats) {
158                                 if(err == null) {
159                                         if(stats["size"] > sentSoFar) {
160                                                 // if file size changed between last chunk and this chunk, send the chunks
161                                                 
162                                                 lastChunk = 0;
163                                                 // open the file, send the data
164                                                 var rs = fs.createReadStream(unify.fullFilePath, {start: sentSoFar, end: stats["size"]});
165                                                 
166                                                 rs.on("data", function(thisdata) {
167                                                         //console.log("inline chunk: ", thisdata.length);
168                                                         unify.b.write(thisdata);
169                                                 });
170                                                 
171                                                 rs.on("end", function() {
172                                                         sentSoFar = stats["size"];
173                                                         // every second, we start again
174                                                         if(sentSoFar != fsize) {
175                                                                 setTimeout(sendPieces, 1000);
176                                                         } else {
177                                                                 // we're done!
178                                                                 unify.b.end();
179                                                         }
180                                                 });
181                                         } else {
182                                                 // if file size did not change between last timeout and this one, incremement the chunk counter
183                                                 // if we reach 60, we had a problem, and so we bomb out
184                                                 
185                                                 lastChunk++;
186                                                 
187                                                 // we bombed out somehow
188                                                 if(lastChunk > 60) {
189                                                         unify.b.end();
190                                                 } else {
191                                                         setTimeout(sendPieces, 1000);
192                                                 }
193                                         }
194                                 } else {
195                                         console.log("inline service - we're in a very bad place");
196                                 }
197                         });
198                         
199                 }
200                 
201                 setTimeout(sendPieces, 100);
202         });
203 }
204
205 // the service file routine .... PLEASE KILL ME!
206 function serviceFile(unify) {
207         
208         // for now, ignore range.
209         // however we need to check if a metadata file exists describing the filesize, check if its all correct
210         // and if not, erase the file (and metafile) and forward the request back to upstream request
211
212         
213         checkFile(unify, function() {
214                 
215                 // file should already exist, so we just poop it out
216                 var inp = fs.createReadStream(unify.fullFilePath);
217                 //inp.setEncoding("utf8");
218                 inp.on("data", function(data) {
219                         unify.b.write(data);
220                 });
221                 
222                 inp.on("end", function(closed) {
223                         unify.b.end();
224                 });
225         });
226 }
227
228 exports.serviceFile = serviceFile;
229
230
231 function checkFile(unify, callback) {
232         // in here we do the metadata checks
233         var metafilename = unify.fullPathDirName + "/.meta."+ path.basename(unify.requestFor) +".filesize";
234         
235         fs.exists(metafilename, function(existence) {
236                 if(existence) {
237                         var fsizef = fs.createReadStream(metafilename);
238                         var fsize = "";
239                         fsizef.on("data", function(data) {
240                                 fsize += data;
241                         });
242                         
243                         fsizef.on("end", function() {
244                                 fs.stat(unify.fullFilePath, function(err, stats) {
245                                         var rfsize = stats["size"];
246                                         if(rfsize != fsize.trim()) {
247                                                 // remove the file and start again
248                                                 console.log("reported filesizes dont match, '%s', '%s', removing file and starting again", rfsize, stats["size"]);
249                                                 try {
250                                                         fs.unlink(metafilename, function(){
251                                                                 fs.unlink(unify.fullFilePath, function(){
252                                                                         upstreamRequest(unify);                                                 
253                                                                 })
254                                                         });
255                                                 } catch(e) {
256                                                         upstreamRequest(unify);
257                                                 }
258                                         } else {
259                                                 // we're good
260                                                 unify.b.writeHead(200, {"Content-Length" : unify.fileSize})
261                                                 callback();
262                                         }
263                                 });
264                         });
265                 } else {
266                         console.log("file, '%s' exists but has no filesize meta data, assuming it was put here manually and servicing", unify.fullFilePath);
267                         unify.b.writeHead(200, {"Content-Length" : unify.fileSize})
268                         callback();
269                 }
270         });
271 }
272
273 function makeCacheDir(path) {
274         console.log("attempting to create... '%s' as '%s'", path.fullPathDirName, path.subPathDirName);
275         
276         var startAt = path.topFullPath;
277         var nextbits = path.subPathDirName.split("/");
278         for(var i=0; i < nextbits.length; i++) {
279                 startAt += "/" + nextbits[i];
280                 console.log("attempt mkdir on '%s'", startAt);
281                 try {
282                         fs.mkdirSync(startAt);
283                 } catch(e) {
284                         //console.log("e in mkdir, ", e);
285                 }
286         }
287         //process.exit(0);
288 }
289
290 function serviceDirectory(unify) {
291         var nfiles = 0;
292         var res = unify.b;
293         
294         res.write("<html><h1>Directory listing for " + unify.originalReq + "</h1><hr><pre>");
295         if(unify.originalReq != "/") res.write("<a href=\"..\">Parent</a>\n\n");
296         fs.readdir(unify.fullFilePath, function(err, files) {
297                 console.log("doing directory listing on: ", unify.fullFilePath);
298                 if(err == null) {
299                         
300                         // TODO: make this work asynchronously...
301                         for(var i=0; i<files.length; i++) {
302                                 // avoiding statSync is too hard for now, will fix later TODO: fix this sync bit
303                                 var stats = fs.statSync(unify.fullFilePath+"/"+files[i]);
304                                 
305                                 if(files[i].match(/^\..*/) == null) {
306                                         if(stats.isDirectory()) {
307                                                 
308                                                 res.write("Directory: <a href=\""+files[i]+"/\">"+files[i]+"/</a>\n");
309                                                 nfiles++;
310                                         } else if(stats.isFile()) {
311                                                 var padlength = 80 - (files[i].length) - stats.size.toString().length;
312                                                 var padding = "";
313                                                 if(padlength > 0) {
314                                                         padding = new Array(padlength).join(" ");
315                                                 }
316                                                 res.write("File:      <a href=\""+files[i]+"\">"+files[i]+"</a>"+padding+stats.size+" bytes\n");
317                                                 nfiles++;
318                                         }
319                                 } else {
320                                         console.log("ignoring file, ", files[i]);
321                                 }
322                         }
323                         
324                         if(nfiles == 0) res.write("Empty directory....\n");
325                         
326                         res.write("<hr></pre>");
327                         res.end();
328                 } else {
329                         res.write("we have entered bizaro world...\n");
330                         res.write("</pre>");
331                         res.end();
332                 }
333         });
334 }
335
336 function moveToCleanup(file_or_dir) {
337         // err..?
338         var cleanup = global.repoproxy.cacheDir + "/.cleanup";
339         var ctime = new Date().getTime();
340         var encoded = (++global.repoproxy.fileid).toString();
341         var toloc = cleanup + "/" + ctime.toString() + "." + encoded;
342         
343         //console.log("Moving %s to %s for cleanup", file_or_dir.replace(/\/$/, ""), toloc);
344         
345         fs.renameSync(file_or_dir.replace(/\/$/, ""), toloc);
346 }
347
348 function cleanupRoutine() {
349         
350 }
351
352
353 exports.serviceDirectory = serviceDirectory;
354 exports.moveToCleanup = moveToCleanup;