libfossil  SQLSnippets

SQL Snippets

This page hosts a collection of SQL snippets which have proven useful when tinkering file fossil. More can be found in the source tree - see the files named q-*.sql.

List of files changed by a given commit UUID:

To get just the names of files modified in a given RID:

SELECT name FROM filename f, mlink m
WHERE m.mid=3242 -- checkin RID value
AND m.fnid=f.fnid;

To get more info...

SELECT
    bf.uuid,
    filename.name fname,
    bf.size
FROM mlink, filename,
    blob bf, -- FILE blob
    blob bm -- MANIFEST/checkin blob
WHERE 
bm.uuid glob '6b581c89f90c*'
-- resp: bm.rid=rid_of_checkin
AND filename.fnid=mlink.fnid
AND bf.rid=mlink.fid
AND bm.rid=mlink.mid
;

To get timestamp info (same value for all rows) we need the "event" table:

SELECT
    bf.uuid,
    filename.name fname,
    bf.size,
    datetime(event.mtime)
FROM mlink, filename, event,
    blob bf, -- FILE blob
    blob bm -- MANIFEST/checkin blob
WHERE 
bm.uuid glob '6b581c89f90c*'
-- resp: bm.rid=rid_of_checkin
AND filename.fnid=mlink.fnid
AND event.objid=mlink.mid
AND bf.rid=mlink.fid
AND bm.rid=mlink.mid
;

All ancestors of a given checkin

My first "WITH RECURSIVE" statement, adapted from one written by drh, lists information about the ancestry of a given commit...

-- All ancestors (direct or merged!) of the checkin
-- RID given in the first SELECT...
WITH RECURSIVE
  origin(rid) AS(
    SELECT 3285 -- origin RID
  ),
  ancestors(rid,uuid,tm,user,comment) AS (
     -- Change this checkin RID:
     SELECT b.rid, b.uuid, e.mtime, e.user,
            coalesce(e.ecomment,e.comment)
        FROM blob b, event e, origin
        WHERE b.rid=origin.rid and e.objid=b.rid
     UNION ALL
     SELECT p.pid, b.uuid, e.mtime, e.user,
            coalesce(e.ecomment,e.comment)
     FROM plink p, blob b, ancestors a, event e
        WHERE p.cid=b.rid
        AND p.cid=a.rid
        AND e.objid=p.pid
 )
SELECT a.rid,
       substr(a.uuid,0,8) uuid,
       datetime(a.tm,'localtime') time,
       user,
       substr(comment,0,20)||'...' comment
from ancestors a
-- Optionally limit it to the first N
-- ancestors (including the original checkin):
     LIMIT 6
;

Example output:

rid	uuid	time	user	comment
3285	084c05a	2014-02-03 20:18:07	stephan	disabled auto-updat...
3302	084c05a	2014-02-03 18:34:27	stephan	x64/i32 compatibili...
3292	782894e	2014-02-03 14:01:47	stephan	Removed check for s...
3286	4e9d087	2014-02-03 11:33:29	stephan	minor build/portabi...
3257	249ad02	2014-02-02 19:15:24	stephan	minor doc update....
3255	3c03462	2014-02-02 19:10:20	stephan	Added unit test for...

(Sidebar: that same info is all available in the event table, so we could change the recursion to only fetch the RIDs, then join that data to the event table to get the same info.)

A very similar query, which could possibly be simplified by one wiser in the ways of SQL than myself, which uses a (Julian) time to limit how far back the ancestry check goes:

WITH RECURSIVE
  ancestors(rid,uuid,tm) AS (
     -- Change this checkin RID:
     SELECT b.rid, b.uuid, e.mtime FROM blob b, event e
        WHERE b.rid=3279 and e.objid=b.rid
     UNION ALL
     SELECT p.pid, b.uuid, e.mtime
     FROM plink p, blob b, ancestors a, event e
        WHERE p.cid=b.rid
        AND p.cid=a.rid
        AND e.objid=p.pid
        AND e.mtime > 2456691.31809243 -- mtime of RID 3279 minus 1 day
 )
SELECT b.rid,
       substr(b.uuid,0,8) uuid,
       datetime(a.tm,'localtime') time
from blob b, ancestors a
WHERE b.rid=a.rid

Here's a more refined variant of the above, applying what i learned during an evening or two of tinkering with it...

-- All ancestors (direct or merged!) of the checkin
-- RID given in the first SELECT...
WITH RECURSIVE
-- Change config.rid and config.cutoffTime to change the query's behaviour.
-- rid is the starting version
-- cutoffTime is a Julian Day limit when searching for older
-- versions, based on rid's checkin time. If cutoffTime is 0
-- or less then no time limit is applied.
  config(rid,cutoffTime) AS(

-- Most recent commit:
    SELECT (SELECT MAX(objid) FROM event ORDER BY mtime DESC) rid,
-- SELECT 3285 as rid
-- Julian day limit to go back in time. <=0 means unlimited:
   1.5 cutoffTime
-- No limit:
--    0
  ),
  origin(rid, mtime,cutoffTime) AS(
    -- origin RID
    SELECT b.rid as rid,
           e.mtime as mtime,
           config.cutoffTime cutoffTime
    FROM blob b, event e, config
    WHERE
    b.rid=config.rid
    AND e.objid=b.rid
  ),
  ancestors(rid,uuid,tm,user,comment) AS (
     SELECT b.rid, b.uuid, e.mtime, e.user,
            coalesce(e.ecomment,e.comment)
        FROM blob b, event e, origin
        WHERE b.rid=origin.rid and e.objid=b.rid
     UNION ALL
     SELECT p.pid, b.uuid, e.mtime, e.user,
            coalesce(e.ecomment,e.comment)
     FROM plink p, blob b,
          ancestors a, event e,
          origin
        WHERE p.cid=b.rid
        AND p.cid=a.rid
        AND e.objid=p.pid
-- Only trace back this far in time (Julian Days)...
-- Replace set config.cutoffTime to 0 to go back any amount of time
-- (and be prepared to add a LIMIT unless you want a long,
-- long list).
        AND CASE WHEN origin.cutoffTime>0 THEN e.mtime >= (origin.mtime - origin.cutoffTime) ELSE 1 END
 )
SELECT a.rid,
       substr(a.uuid,0,8) uuid,
       datetime(a.tm,'localtime') time,
       user,
       substr(comment,0,20)||'...' comment
from ancestors a
-- Optionally limit it to the first N
-- ancestors (including the original checkin):
-- LIMIT 25
-- A config.cutoffTime time can prevent it from going too far back
;

Checkout-vs-repo Overview

An overview of info similar to that provided by fossil changes and similar commands:

SELECT
        id,vid, mrid, deleted,
        chnged,
        datetime(mtime,'unixepoch','localtime') as local_time,
        size, uuid, origname, pathname
FROM vfile LEFT JOIN blob ON vfile.mrid=blob.rid
WHERE vid=(SELECT value FROM vvar WHERE name='checkout')
AND chnged
ORDER BY pathname;

Ancestry of a File

An overview of info the ancestry of a file:

SELECT
b.uuid as manifestUuid,
datetime(p.mtime) as manifestTime,
--       ml.*,
b.size AS manifestSize,
fn.name AS filename
FROM mlink ml, filename fn, blob b, plink p
WHERE ml.fnid=fn.fnid
AND fn.name='f-timeline.c' -- CHANGE THIS NAME
AND ml.mid=b.rid
AND p.cid=ml.mid
ORDER BY manifestTime DESC;

Not yet sure how it behaves across branches, and it certainly doesn't track renames.

Latest Checkin Info for a Given Branch

This can probably be done more simply, but here's an approach which uses a CTE to select a list of branch names:

WITH branch(name) AS (
  SELECT 'dave'
  UNION ALL
  SELECT 'trunk'
),
latestByBranch(name, rid, uuid, ts) AS(
     SELECT branch.name, event.objid, blob.uuid, datetime(max(event.mtime))
     FROM tag, tagxref, event, branch, blob
     WHERE tag.tagname='sym-' || branch.name
     AND tagxref.tagid=tag.tagid
     AND tagxref.tagtype>0
     AND event.objid=tagxref.rid
     AND blob.rid=event.objid
     AND event.type GLOB 'ci'
     GROUP BY branch.name
)
SELECT * FROM latestByBranch
ORDER BY ts DESC
;

Recursion Example

See also: http://www.sqlite.org/draft/lang_with.html

The following is adapted from a sqlite mailing list post by Petite Abeille on 20140203, not directly applicable to fossil but which is an interesting example nonetheless and which can certainly be used as a model for generating fossil-related data:

with
DataSet(node,parent)
as
(
  select  'A', null union all
  select  'F', null union all
  select  'B', 'A' union all
  select  'C', 'B' union all
  select  'E', 'F' union all
  select  'H', 'E' union all
  select  'G', 'E' union all
  select  'D', 'C'
),
Hierarchy( node, parent, level, path, isHead, isTail )
as
(
  select  DataSet.node as node,
          'NULL' as parent, -- DataSet.parent,
          0 as level,
          ' → ' || DataSet.node as path,
          DataSet.parent IS NULL,
          NOT EXISTS (SELECT 1 FROM DataSet d WHERE d.parent=DataSet.node)
  from     DataSet
  where    DataSet.parent is null

  union all
  select  DataSet.node as node,
          DataSet.parent as parent,
          Hierarchy.level + 1 as level,
          Hierarchy.path || ' → ' || DataSet.node as path,
          DataSet.parent IS NULL,
          NOT EXISTS (SELECT 1 FROM DataSet d WHERE d.parent=DataSet.node)
  from    Hierarchy
  join    DataSet
  on      DataSet.parent = Hierarchy.node
  ORDER BY parent, node
)
select    *
from      Hierarchy
-- where level>1
order by  path;