################################################################################
# This is the first try to provide access to tar-files via
# the vfs-mechanism.
# This file is copied and adapted from zipvfs.tcl
# (and ftpvfs.tcl). The internal structure for the tar-data is stored
# analog to zipvfs so that many functions can be the same as in zipvfs.
#
# Jan 13 2003: Stefan Vogel (stefan.vogel@avinci.de)
# (reformatted to tabsize 8 by Vince).
#
# May 31 2024: chw
# Add gzip support.
#
# TODOs:
# * add writable access (should be easy with tar-files)
# * more testing :-(
################################################################################

package require vfs
package provide vfs::tar 0.91

# Using the vfs, memchan and Trf extensions, we're able
# to write a Tcl-only tar filesystem.

namespace eval vfs::tar {}

proc vfs::tar::Mount {tarfile local} {
    if {[string first "tar://" $tarfile] == 0} {
	set tarfile [string range $tarfile 6 end]
    }
    set fd [vfs::tar::_open [::file normalize $tarfile]]
    if {[file pathtype $local] ne "absolute"} {
	set local [file normalize $local]
    }
    vfs::filesystem mount $local [list ::vfs::tar::handler $fd]
    # Register command to unmount
    vfs::RegisterMount $local [list ::vfs::tar::Unmount $fd]
    return $fd
}

proc vfs::tar::Unmount {fd local} {
    vfs::filesystem unmount $local
    vfs::tar::_close $fd
}

proc vfs::tar::handler {tarfd cmd root relative actualpath args} {
    if {$cmd eq "matchindirectory"} {
	# e.g. called from "glob *"
	eval [list $cmd $tarfd $relative $actualpath] $args
    } else {
	# called for all other commands: access, stat
	eval [list $cmd $tarfd $relative] $args
    }
}

proc vfs::tar::attributes {tarfd} {
    return [list "state"]
}

proc vfs::tar::state {tarfd args} {
    vfs::attributeCantConfigure "state" "readonly" $args
}

# If we implement the commands below, we will have a perfect
# virtual file system for tar files.
# Completely copied from zipvfs.tcl

proc vfs::tar::matchindirectory {tarfd path actualpath pattern type} {
    # This call to vfs::tar::_getdir handles empty patterns properly as asking
    # for the existence of a single file $path only
    set res [vfs::tar::_getdir $tarfd $path $pattern]
    if {![string length $pattern]} {
	if {![vfs::tar::_exists $tarfd $path]} {
	    return {}
	}
	set res [list $actualpath]
	set actualpath ""
    }

    set newres [list]
    foreach p [::vfs::matchCorrectTypes $type $res $actualpath] {
	lappend newres [file join $actualpath $p]
    }
    return $newres
}

# Return the necessary "array"
proc vfs::tar::stat {tarfd name} {
    vfs::tar::_stat $tarfd $name sb
    unset -nocomplain sb(gzip)
    array get sb
}

proc vfs::tar::access {tarfd name mode} {
    if {$mode & 2} {
	vfs::filesystem posixerror $::vfs::posix(EROFS)
    }
    # Readable, Exists and Executable are treated as 'exists'
    # Could we get more information from the archive?
    if {[vfs::tar::_exists $tarfd $name]} {
	return 1
    } else {
	error "No such file"
    }
}

proc vfs::tar::open {tarfd name mode permissions} {
    # Return a list of two elements:
    # 1. first element is the Tcl channel name which has been opened
    # 2. second element (optional) is a command to evaluate when
    #    the channel is closed.

    switch -- $mode {
	"" -
	"r" {
	    if {![vfs::tar::_exists $tarfd $name]} {
		vfs::filesystem posixerror $::vfs::posix(ENOENT)
	    }

	    vfs::tar::_stat $tarfd $name sb

	    set nfd [vfs::memchan]
	    fconfigure $nfd -translation binary

	    # get the starting point from structure
	    if {$sb(gzip)} {
		seek $tarfd 0
		zlib push gunzip $tarfd
		fconfigure $tarfd -translation binary
		_zseek $tarfd $sb(ino)
	    } else {
		seek $tarfd $sb(ino)
	    }
	    vfs::tar::_data $tarfd sb data
	    if {$sb(gzip)} {
		chan pop $tarfd
		seek $tarfd 0
	    }

	    puts -nonewline $nfd $data

	    fconfigure $nfd -translation auto
	    seek $nfd 0
	    return [list $nfd]
	}
	default {
	    vfs::filesystem posixerror $::vfs::posix(EROFS)
	}
    }
}

proc vfs::tar::createdirectory {tarfd name} {
    vfs::filesystem posixerror $::vfs::posix(EROFS)
}

proc vfs::tar::removedirectory {tarfd name recursive} {
    #::vfs::log "removedirectory $name"
    vfs::filesystem posixerror $::vfs::posix(EROFS)
}

proc vfs::tar::deletefile {tarfd name} {
    vfs::filesystem posixerror $::vfs::posix(EROFS)
}

# Don't care about platform-specific attributes
proc vfs::tar::fileattributes {tarfd name args} {
    #::vfs::log "fileattributes $args"
    switch -- [llength $args] {
	0 {
	    # list strings
	    return [list]
	}
	1 {
	    # get value
	    set index [lindex $args 0]
	    return ""
	}
	2 {
	    # set value
	    set index [lindex $args 0]
	    set val [lindex $args 1]
	    vfs::filesystem posixerror $::vfs::posix(EROFS)
	}
    }
}

# Set the 'mtime' of a file.
proc vfs::tar::utime {fd path actime mtime} {
    vfs::filesystem posixerror $::vfs::posix(EROFS)
}

#
# tar decoder:
#
# Format of tar file:
# see http://www.gnu.org/manual/tar/html_node/tar_123.html
# "comments" are put into the the arrays for readability
# the fields in aPosixHeader are stored inside a
# 512-byte-block. Not all header-fields are used here.
#
# Here are some excerpts from the above resource for information
# only:
#
# name, linkname, magic, uname, and gname are null-terminated strings.
# All other fileds are zero-filled octal numbers in ASCII.
# Each numeric field of width w contains
#   w minus 2 digits, a space, and a null,
#   except size, and mtime, which do not contain the trailing null

# mtime field is the modification time of the file at the time it was
# archived. It is the ASCII representation of the octal value of the
# last time the file was modified, represented as an integer number
# of seconds since January 1, 1970, 00:00 Coordinated Universal Time


namespace eval vfs::tar {
    set HEADER_SIZE 500
    set BLOCK_SIZE 512

    # Fields of header with start/end-index in "comments": length of
    # field in bytes (just for documentation) prefix is the
    # "datatype": s == null-terminated string o == zero-filled octal
    # number (numeric but leave it octal e.g mode) n == numeric -->
    # integer change to decimal) "not used" is marked when the field
    # is not needed anywhere here
    array set aPosixHeader {
	name      {s 0    99}     # 100
	mode      {o 100 107}     # "8   - not used now"
	uid       {n 108 115}     # 8
	gid       {n 116 123}     # 8
	size      {n 124 135}     # 12
	mtime     {n 136 147}     # 12
	chksum    {o 148 155}     # "8   - not used"
	typeflag  {o 156 156}     # 1
	linkname  {s 157 256}     # "100 - not used"
	magic     {s 257 262}     # "6   - not used"
	version   {o 263 264}     # "2   - not used"
	uname     {s 265 296}     # "32  - not used"
	gname     {s 297 328}     # "32  - not used"
	devmajor  {o 329 336}     # "8   - not used"
	devminor  {o 337 344}     # "8   - not used"
	prefix    {o 345 499}     # "155 - not used"
    }

    # Just for compatibility with posix-header
    # only DIRTYPE is used
    array set aTypeFlag {
	REGTYPE  0            # "regular file"
	AREGTYPE \000         # "regular file"
	LNKTYPE  1            # link
	SYMTYPE  2            # reserved
	CHRTYPE  3            # "character special"
	BLKTYPE  4            # "block special"
	DIRTYPE  5            # directory
	FIFOTYPE 6            # "FIFO special"
	CONTTYPE 7            # reserved
    }
}

proc vfs::tar::_data {fd arr {varPtr ""}} {
    upvar 1 $arr sb

    if {$varPtr eq ""} {
	# CAUTION: Will fail for zlib channel
	seek $fd $sb(size) current
    } else {
	upvar 1 $varPtr data
	set data [read $fd $sb(size)]
    }
}

proc vfs::tar::TOC {fd arr toc} {
    variable aPosixHeader
    variable aTypeFlag
    variable HEADER_SIZE
    variable BLOCK_SIZE

    upvar 1 $arr sb
    upvar 1 $toc _toc

    set gzip 0
    set pos 0
    set oldpos 0
    set sb(nitems) 0

    if {[info command zlib] eq "zlib"} {
	set hdr [read $fd 2]
	seek $fd 0
	if {$hdr eq "\x1f\x8b"} {
	    zlib push gunzip $fd
	    fconfigure $fd -translation binary
	    incr gzip
	}
    }

    # Loop through file in blocks of BLOCK_SIZE
    while {![eof $fd]} {
	if {$gzip} {
	    _zseek $fd [expr {$pos - $oldpos}]
	    set oldpos $pos
	} else {
	    seek $fd $pos
	}
	set hdr [read $fd $BLOCK_SIZE]
	incr oldpos $BLOCK_SIZE

	# Read header-fields from block (see aPosixHeader)
	foreach key {name typeflag size mtime uid gid} {
	    set type [lindex $aPosixHeader($key) 0]
	    set positions [lrange $aPosixHeader($key) 1 2]
	    switch $type {
		s {
		    set $key [eval [list string range $hdr] $positions]
		    # Cut the trailing Nulls
		    set $key [string range [set $key] 0 \
			[expr {[string first "\000" [set $key]] - 1}]]
		}
		o {
		    # Leave it as is (octal value)
		    set $key [eval [list string range $hdr] $positions]
		}
		n {
		    set $key [eval [list string range $hdr] $positions]
		    # Change to integer
		    scan [set $key] "%o" $key
		    # If not set, set default-value "0"
		    # (size == "" is not a very good value)
		    if {![string is integer [set $key]] || [set $key] eq ""} {
			set $key 0
		    }
		}
		default {
		    error "tar::TOC: '$fd' wrong type for header-field: '$type'"
		}
	    }
	}

	# Only the last three octals are interesting for mode
	# ignore mode now, should this be added??
	# set mode 0[string range $mode end-3 end]

	# Get the increment to the next valid block
	# (ignore file-blocks in between)
	# if size == 0 the minimum incr is 512
	set incr [expr {int(ceil($size/double($BLOCK_SIZE)))*$BLOCK_SIZE+$BLOCK_SIZE}]

	set startPosition [expr {$pos+$BLOCK_SIZE}]
	# Make it relative to this working-directory, remove the
	# leading "relative"-paths
	regexp -- {^(?:\.\.?/)*/?(.*)} $name -> name

	if {$name ne ""} {
	    incr sb(nitems)
	    set sb($name,ino) [expr {$pos+$BLOCK_SIZE}]
	    set sb($name,size) $size
	    set type "file"
	    # The mode should be 0777?? or must be changed to decimal?
	    if {$typeflag == $aTypeFlag(DIRTYPE)} {
		# Directory, append this without /
		# leave mode: 0777
		# (else we might not be able to walk through archive)
		set type "directory"
		lappend _toc([string trimright $name "/"]) \
		    name [string trimright $name "/"] \
		    type $type mtime $mtime size $size mode 0777 \
		    ino $startPosition \
		    depth [llength [file split $name]] \
		    uid $uid gid $gid
	    }
	    lappend _toc($name) \
		name $name \
		type $type mtime $mtime size $size mode 0777 \
		ino $startPosition depth [llength [file split $name]] \
		uid $uid gid $gid gzip $gzip
	}
	incr pos $incr
    }
    if {$gzip} {
	chan pop $fd
	seek $fd 0
    }
    return
}

proc vfs::tar::_open {path} {
    set fd [::open $path]

    if {[catch {
	upvar #0 vfs::tar::$fd.toc toc
	fconfigure $fd -translation binary ;#-buffering none
	vfs::tar::TOC $fd sb toc
    } err]} {
	close $fd
	return -code error $err
    }

    return $fd
}

proc vfs::tar::_exists {fd path} {
    #::vfs::log "$fd $path"
    if {$path eq ""} {
	return 1
    } else {
	upvar #0 vfs::tar::$fd.toc toc
	return [expr {[info exists toc($path)] || [info exists toc([string trimright $path "/"]/)]}]
    }
}

proc vfs::tar::_stat {fd path arr} {
    upvar #0 vfs::tar::$fd.toc toc
    upvar 1 $arr sb

    if {$path eq "" || $path eq "."} {
	array set sb {
	    type directory mtime 0 size 0 mode 0777
	    ino -1 depth 0 name ""
	}
    } elseif {![info exists toc($path)] } {
	return -code error "could not read \"$path\": no such file or directory"
    } else {
	array set sb $toc($path)
    }

    # Set missing attributes
    set sb(dev) -1
    set sb(nlink) 1
    set sb(atime) $sb(mtime)
    set sb(ctime) $sb(mtime)

    return ""
}

# Treats empty pattern as asking for a particular file only.
# Directly copied from zipvfs.
proc vfs::tar::_getdir {fd path {pat *}} {
    upvar #0 vfs::tar::$fd.toc toc

    if {$path eq "." || $path eq ""} {
	set path $pat
    } else {
	set path [string tolower $path]
	if {$pat ne ""} {
	    append path /$pat
	}
    }
    set depth [llength [file split $path]]

    if {$depth} {
	set ret {}
	foreach key [array names toc $path] {
	    if {[string index $key end] eq "/"} {
		# Directories are listed twice: both with and without
		# the trailing '/', so we ignore the one with
		continue
	    }
	    array set sb $toc($key)

	    if {$sb(depth) == $depth} {
		if {[info exists toc(${key}/)]} {
		    array set sb $toc(${key}/)
		}
		# Remove sb(name) (because == $key)
		lappend ret [file tail $key]
	    }
	    unset sb
	}
	return $ret
    } else {
	# Just the 'root' of the zip archive.  This obviously exists and
	# is a directory.
	return [list {}]
    }
}

proc vfs::tar::_zseek {fd spos} {
    set pos 0
    set toread 10240
    while {$pos < $spos} {
	if {$spos - $pos < $toread} {
	    set toread [expr {$spos - $pos}]
	}
	if {[eof $fd]} {
	    return -code error "unexpected EOF"
	}
	set nread [string length [read $fd $toread]]
	if {$nread == 0} {
	    break
	}
	incr pos $nread
    }
    return $pos
}

proc vfs::tar::_close {fd} {
    variable $fd.toc
    unset -nocomplain $fd.toc
    ::close $fd
}
