/************************************************************************
A quick hack to stuff the site contents into an sqlite3 db for FTS
indexing purposes. Pass it a list of content filenames, relative to
the top of the web site's docroot. This script will slurp in the
contents of those files and index them as belonging to the the
directory part of that file (it assumes a 1:1 mapping of content files
to directory URIs).
Note that it should be passed the dir-content.html of each directory,
not the index.html, as the latter contains site-level stuff which is
repeated across all pages (e.g. the site menu). Also, don't pass it
the dir-content.s2.html because those files don't contain all the
generated content, and may contain s2 content we don't want indexed.
************************************************************************/
const s2 = s2,
flist = (s2.ARGV ? s2.ARGV.nonFlags : 0) ||| 0,
cliFlags = (s2.ARGV ? s2.ARGV.flags : 0) ||| {prototype:null}
;
if(!flist || !flist.#){
throw "Expecting list of files to process.";
}
if(!s2.sqlite3){
const R = s2.require ||| scope {
affirm typeinfo(isstring const home = cliFlags['s2.home'] ||| s2.getenv('S2_HOME'));
import(false, home + '/require.d/require.s2');
};
affirm typeinfo(iscontainer R);
s2.sqlite3 = R(['s2mod!sqlite3']).0;
}
const app = {
sqlite3: s2.sqlite3,
config:{
dbFile: cliFlags['db-file'] ||| __FILEDIR+'fts-pages.sqlite3',
dbOpenFlags: 'w'
},
db: proc x(){
x.$ && return x.$;
affirm typeinfo(isobject this.sqlite3);
return x.$ = new this.sqlite3(this.config.dbFile,
this.config.dbOpenFlags);
},
flags: s2.ARGV.flags,
flist: flist,
/**
Inserts file f into the FTS database. u may be the URI for the
file, and defaults to the directory part of f.
*/
insert:proc(f,u){
if(!u){
var a = f.split('/'); a.pop();
u = '/'+a.join('/');
}
this.db().prepare(I).exec(S(f).mtime, u, B(f)).finalize();
} using {
I: "insert into page(mtime, uri, content) values(?,?,?)"
/** ^^^ noting that the mtime we use here always uses
dir-content.html instead of dir-content.s2.html (which
would arguably be more correct but those files aren't any
good for indexing). */,
B: s2.Buffer.readFile,
S: s2.fs.stat
/**
Potential TODO: check for the existence of a modified form
of filenames (s/\.html/.s2.html/) and if it exists then use
it for timestamping, rather than f. Alternately, allow the
passing-in of the timestamp from the CLI, as that would
allow us to feed in the timestamps fossil knows (the time
of the last commit the file was in). (Fossil's touch command,
added since that was written, obviates that need.)
*/
}
};
/*affirm typeinfo(isstring const infile = app.flags['i']);
affirm typeinfo(isstring const uri = (app.flags['uri'] ||| scope{
const a = infile.split('/');
a.pop();
'/'+a.join('/');
}));
*/
const verbose = app.flags.v
? proc(){ return print.apply(this, argv) } using(print)
: proc(){};
app.db().transaction(function() using(app,verbose){
foreach(@app.flist=>v){
if(0===v.indexOf('./')) v=v.substr(2);
const a = v.split('/');
a.pop();
const uri = '/'+a.join('/');
verbose('infile:',v,'\nuri:', uri);
app.insert(v, uri);
}
});
app.flags['vacuum'] && app.db().exec("vacuum");
verbose("Done!");