wanderinghorse.net website

fts-pages.s2 at tip
Login

fts-pages.s2 at tip

File site-tools/fts-pages.s2 from the latest check-in


/************************************************************************
A quick hack to stuff the site contents into an sqlite3 db for FTS
indexing purposes. Pass it a list of content filenames, relative to
the top of the web site's docroot. This script will slurp in the
contents of those files and index them as belonging to the the
directory part of that file (it assumes a 1:1 mapping of content files
to directory URIs).

Note that it should be passed the dir-content.html of each directory,
not the index.html, as the latter contains site-level stuff which is
repeated across all pages (e.g. the site menu). Also, don't pass it
the dir-content.s2.html because those files don't contain all the
generated content, and may contain s2 content we don't want indexed.
************************************************************************/

const s2 = s2,
      flist = (s2.ARGV ? s2.ARGV.nonFlags : 0) ||| 0,
      cliFlags = (s2.ARGV ? s2.ARGV.flags : 0) ||| {prototype:null}
;

if(!flist || !flist.#){
    throw "Expecting list of files to process.";
}

if(!s2.sqlite3){
    const R = s2.require ||| scope {
        affirm typeinfo(isstring const home = cliFlags['s2.home'] ||| s2.getenv('S2_HOME'));
        import(false, home + '/require.d/require.s2');
    };
    affirm typeinfo(iscontainer R);
    s2.sqlite3 = R(['s2mod!sqlite3']).0;
}

const app = {
    sqlite3: s2.sqlite3,
    config:{
        dbFile: cliFlags['db-file'] ||| __FILEDIR+'fts-pages.sqlite3',
        dbOpenFlags: 'w'
    },
    db: proc x(){
        x.$ && return x.$;
        affirm typeinfo(isobject this.sqlite3);
        return x.$ = new this.sqlite3(this.config.dbFile,
                                      this.config.dbOpenFlags);
    },
    flags: s2.ARGV.flags,
    flist: flist,
    /**
       Inserts file f into the FTS database. u may be the URI for the
       file, and defaults to the directory part of f.
    */
    insert:proc(f,u){
        if(!u){
            var a = f.split('/'); a.pop();
            u = '/'+a.join('/');
        }
        this.db().prepare(I).exec(S(f).mtime, u, B(f)).finalize();
    } using {
        I: "insert into page(mtime, uri, content) values(?,?,?)"
        /** ^^^ noting that the mtime we use here always uses
            dir-content.html instead of dir-content.s2.html (which
            would arguably be more correct but those files aren't any
            good for indexing). */,
        B: s2.Buffer.readFile,
        S: s2.fs.stat
        /**
           Potential TODO: check for the existence of a modified form
           of filenames (s/\.html/.s2.html/) and if it exists then use
           it for timestamping, rather than f. Alternately, allow the
           passing-in of the timestamp from the CLI, as that would
           allow us to feed in the timestamps fossil knows (the time
           of the last commit the file was in). (Fossil's touch command,
           added since that was written, obviates that need.)
        */
    }
};

/*affirm typeinfo(isstring const infile = app.flags['i']);
affirm typeinfo(isstring const uri = (app.flags['uri'] ||| scope{
    const a = infile.split('/');
    a.pop();
    '/'+a.join('/');
}));
*/

const verbose = app.flags.v
      ? proc(){ return print.apply(this, argv) } using(print)
      : proc(){};

app.db().transaction(function() using(app,verbose){
    foreach(@app.flist=>v){
        if(0===v.indexOf('./')) v=v.substr(2);
        const a = v.split('/');
        a.pop();
        const uri = '/'+a.join('/');
        verbose('infile:',v,'\nuri:', uri);
        app.insert(v, uri);
    }
});
app.flags['vacuum'] && app.db().exec("vacuum");
verbose("Done!");