#!/usr/local/bin/perl # # This is a CGI script to serve HTTPFS Requests # # The HTTPFS requests come from a remote HTTPFS client, which can be # an HTTPFS client framework built into an application, an adapter between # HTTPFS and Midnight Commander's (MC) MCFS, or any other HTTP user agent # (including a web browser or Wget). The purpose of this script is # to list directories on the present computer, send out files, and # accept data for new or existing files. # This script acts effectively as an "NFS daemon" which "exports", well, # the entire file system of this hosts (or a part of it). # # This script is supposed to be called in response to a HEAD, GET, PUT, or # DELETE action. Upon receiving this kind of request, an HHTP server parses # it and calls the present script, passing request headers as env variables, # and the PUT message as this script's standard input. # # Thus this script should receive the following environment # # PATH=/sbin:/usr/sbin:/usr/bin # REMOTE_HOST=10.1.1.1 # HTTP_HOST=10.1.10.1:80 # GATEWAY_INTERFACE=CGI/1.1 # SERVER_SOFTWARE=Netscape-Communications/1.12 # SERVER_URL=http://day-1 # REQUEST_METHOD=PUT # SERVER_NAME=day-1 # HTTP_USER_AGENT=VFS-client/1.1 # SCRIPT_NAME=/cgi-bin/admin/MCHFS-server.pl # SERVER_PORT=80 # SERVER_PROTOCOL=HTTP/1.0 # HTTP_PRAGMA=httpfs="lstat" # REMOTE_ADDR=10.1.1.1 # TZ=GMT0 # PATH_INFO=/aaa/bbb/ccc/ # PATH_TRANSLATED=/w/data/aaa/bbb/ccc/ # # Of interest (importance) to us are REQUEST_METHOD. HTTP_USER_AGENT tells # the name and the version of the agent, just for reference. # PATH_INFO tells the path to the file/directory of interest to the client; # PATH_TRANSLATED is the same path with the server's DocumentRoot # prepended. HTTP_PRAGMA may contain an httpfs command that specializes # the request (for example, to distinguish a 'stat' from a 'lstat' request). # When the request method is PUT, CONTENT_TYPE and CONTENT_LENGTH env # variables _must_ be present, to tell the message size and data format. # # A file/directory path a HTTPFS client specifies is normally a relative path # from this HTTP server's DocumentRoot. For example, if an HTTP server was # configured with DocumentRoot=/w/data, and a client makes a request # GET /cgi-bin/admin/MCHFS-server.pl/images/foo # then the client actually refers to a file /w/data/images/foo. # However, the HTTPFS client may specify a file by its _absolute_ path # on this server: # GET /cgi-bin/admin/MCHFS-server.pl/DeepestRoot/etc/passwd # really refers to a '/etc/passwd' # # # This MCHFS-server.pl script operates in one of the several modes: # # Inquiry: request method HEAD # # 1.1) Inquire of a resource (file or directory) status: # HEAD /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/dirn/file # Pragma: httpfs="stat" # The script returns the status of a specified file or directory in a # ETag: response header # ETag: '"' status-info '"' # where status-info is a string of 11 numbers separated by a single # space: dev ino mode nlink uid gid size atime mtime ctime blocks # where # dev device number of filesystem # ino inode number # mode file mode (type and permissions) # nlink number of (hard) links to the file # uid numeric user ID of file's owner # gid numeric group ID of file's owner # size total size of file, in bytes # atime last access time since the epoch # mtime last modify time since the epoch # ctime inode change time (NOT creation time!) since the epoch # blocks actual number of blocks allocated # all numbers but 'mode' are decimal numbers; mode is an octal number. # see struct stat and 'stat' entry in 'man perlfunc' # # Note the status-info is a "hard validator" - a unique # representation of a resource, file or directory. Indeed, should # a file gets modified, at least its modification timestamp will # change. That's why status-info is being delivered in a ETag, a field # which is designated by the HTTP standard to carry resource identifiers. # # 1.2) Inquire of a 'raw' status of a resource (file or directory) : # HEAD /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/dirn/file # Pragma: httpfs="lstat" # or Pragma omitted # See 1) above; the only difference that a lstat() of a file # is obtained. # # Requesting a resource: GET method # Most of the actions below heed a 'HTTP_IF_MODIFIED_SINCE' request header. # If the header is present and the requested resource was not modified since # the specified time, this script replies '304 Not Modified' and sends # no content. # # 2.1) Listing of a directory: # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/dirn/ # The script should return the contents of a directory # $DocumentRoot/dir1/dir2/dirn # or /dir2/dirn (if dir1 is "DeepestRoot") # The directory in question must exist and be accessible by this script. # The server returns the listing of the directory, a text/plain entity: # for each directory entry (including . and ..) the server writes a line # name/status-info # see above for status-info # The server also sets the Last-Modified: response header to the # modification timestamp of the directory. # # 2.2) Delivery of a file: # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # The script should send the contents of a file # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # If the file exists and is readable by this server, it sends the file # content as it is, tagged with an "application/octet-stream" MIME type. # # 2.3) Reading the content of a symbolic link: # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # Pragma: httpfs="readlink" # The script should send the content (a resolved target name) of a symbolic link # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # If the file is indeed a symbolic link and can be successfully resolved # by this server, it sends the resolved name as the reply content (terminated # with \n), tagged with a "text/plain" MIME type. # # 2.3) "Opening" of a file and sending out the existing content # (if necessary) # GET /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # Pragma: httpfs="preopen-xxxx" # where four characters xxxx in the preopen-xxxx pragma above have the following # meaning: # 1. char: 'R' if the file is being open for reading, '-' otherwise # 2. char: 'W' if the file is being open for writing, # 'A' if the file is being open for appending, # '-' otherwise # 3. char: 'C' if the file is being open with an O_CREAT flag, # 'X' if the file is being open with both O_CREAT and O_EXCL flag, # '-' otherwise # 4. char: 'T' if the file is being open with an O_TRUNC flag, # '-' otherwise # This script checks a file # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # and: # - the file does not exist and O_CREAT is not set: return an error # "404 File not found" # - the file does not exist and O_CREAT is set: create a file with a given # name with a zero size. Return "201 Created" if successful, # "403 Forbidden" if the creation fails. The file is created under the # effective GID and UID of this script, with permissions as granted by # the current umask. # - if the file exists and both O_CREAT and O_EXCL are set, return an # error "403 File already exists" # - if the file exists and O_TRUNC is set, the file is truncated to zero # length. Return "201 Truncated" if successful, or "403 Forbidden" # if the truncation fails. # - If the file is being open for writing or appending, check the permission # of the file. If the file does not permit modifications, send an error # "403 Forbidden" # - otherwise, send the contents of the file, if it wasn't modified # after a specified time, just as in option 2.2 above. # # Modifying/creating a resource: PUT method # Altering contents of a file, creating a new file: # PUT /cgi-bin/admin/MCHFS-server.pl/dir1/dir2/filename # The script should write the submitted data to a file # $DocumentRoot/dir1/dir2/filename # or /dir2/filename (if dir1 is "DeepestRoot") # The file is created if needed. The script must have permissions to # write into the file (or create it). The client was supposed to # send the data for the file as a part of its request. The data should # be tagged with an "application/octet-stream" MIME type. This HTTPFS server # responds in "201 Created" HTTP code, or in one of the error codes. # # Deleting a resource: DELETE method # # Security consideration: # This script obviously opens up the file system of the host computer to # the entire world. If the HTTP daemon runs as a root, then any remote # HTTPFS client can do absolutely anything with this host's files. Clearly this # is not often desirable. Therefore, one may limit access to this script # (via .htaccess or conf/access.conf) to trusted hosts/users, and/or demand # authentication (in a regular HTTP way, by a restrict directive in a # .htaccess file). Note that the access restrictions above are the # responsibility of the httpd; this script doesn't need to do anything about # them (and doesn't even need to be aware of them). # # In addition, this HTTPFS server script may implement its own access # control. For example, it may refuse PUT requests. This effectively makes # exported file systems read-only. The MCHFS-server.pl script may allow # modification/listing of only certain files (based on whatever criteria). # The script may disallow DeepestRoot and ".." in the file paths, thus # restricting user access only to a specific part of the file system tree. # The possibilities are endless. # # This script can also be optimized to take advantage of HTTP/1.1 features: # it may consider conditional range requests (IF_RANGE) and # send out only requested parts of a file (rather than the whole file), # taking advantage of a RANGE: HTTP header. # # # $Id: MCHFS-server.pl,v 3.3 1999/05/21 17:57:04 oleg Exp oleg $ # Tell the client off (with a status in $1 and reason in string $2) # Status may be omitted sub bail_out { my $status = @_ > 1 ? shift : "400 Bad Request"; print "Status: $status\n"; print "Content-type: text/html\n\n"; # Two \n\n terminate the headers print "
$_[0] \n";
exit 4
}
# Put file $1
sub put_file {
my $file_path = shift;
my $to_read = $ENV{CONTENT_LENGTH}
|| bail_out "411 Length required", "CONTENT_LENGTH is missing";
$ENV{CONTENT_TYPE} eq "application/octet-stream"
|| bail_out "PUT message must be of a type application/octet-stream";
open(PUT_FILE,">$file_path")
|| bail_out "403 Forbidden", "$file_path is not writable: $!";
my $chunk_size = 1024; my $chunk;
binmode STDIN; binmode PUT_FILE;
while ( $to_read > 0 &&
($read_res = sysread STDIN,$chunk,
($chunk_size < $to_read ? $chunk_size : $to_read))) {
syswrite PUT_FILE,$chunk,$read_res
|| bail_out "$file_path write error $!";
$to_read -= $read_res;
}
$to_read == 0 || bail_out "500 Read Error", "Failed to read the input completely";
close PUT_FILE;
my @stat_res = lstat($file_path);
@stat_res || bail_out "404 Not found or accessible",
"$file_path not found or unreadable: $!";
print "Status: 201 Created $file_path\n";
print "ETag: \""; print format_stat_info(@stat_res); print "\"\n\n";
exit 0
}
# Format the the list returned by a stat or lstat command
sub format_stat_info {
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks) = @_;
return sprintf "%d %d %o %d %d %d %d %d %d %d %d",
$dev,$ino,$mode,$nlink,$uid,$gid,$size,
$atime,$mtime,$ctime,$blocks;
}
# Inquiry of a file $1
sub inquiry_file {
my $file_path = shift;
my $sub_command = $ENV{HTTP_PRAGMA};
$sub_command =~ s/httpfs=\"(\w+)\"/$1/;
my @stat_res = ($sub_command eq "stat") ? stat($file_path) :
( not $sub_command or $sub_command eq "lstat" ) ? lstat($file_path) :
bail_out "Invalid pragma httpfs command $sub_command";
@stat_res || bail_out "404 Not found or accessible",
"$file_path not found or unreadable: $!";
print "ETag: \""; print format_stat_info(@stat_res); print "\"\n\n";
exit 0;
}
# Print a Last-Modified: header corresponding to the epoch timestamp $1
# e.g., Last-modified: Sun, 06 Nov 1994 08:49:37 GMT
sub print_last_modif {
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
gmtime(shift);
printf "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\n",
("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")[$wday],
$mday,
("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec")[$mon], 1900+$year,
$hour,$min,$sec;
}
# Resolve the symbolic link given as $1
sub read_link {
my $resolved_link = readlink shift;
$resolved_link || bail_out "404 Bad symbolic link",
"Symbolic link cannot be resolved: $!";
print "Content-type: text/plain\n\n";
print $resolved_link, "\n";
exit 0;
}
# List a directory $1
sub list_directory {
( chdir shift && opendir(DIR,".") )
|| bail_out "404 Not found or accessible",
"directory is not found, or unreadable: $!";
print "Content-type: text/plain\n\n";
map {
print "$_/";
print format_stat_info( lstat );
print "\n";
}
readdir(DIR);
closedir DIR;
exit 0
}
# Send a regular file whose path is in $1
sub send_regular_file {
my $file_path = shift;
sysopen(GET_FILE,"$file_path",O_RDONLY)
|| bail_out "404 Not found or accessible",
"$file_path not found, or unreadable";
$| = 1;
binmode GET_FILE;
print "Content-type: application/octet-stream\n\n";
# print "Content-type: text/plain\n\n";
my $chunk_size = 1024; my $chunk; my $read_res;
binmode STDOUT;
while ( $read_res = sysread GET_FILE,$chunk,$chunk_size ) {
syswrite STDOUT,$chunk,$read_res
|| bail_out "STDOUT write error $!";
}
close GET_FILE;
exit 0
}
# Pre-open a file $1 according to modes in $2, as explained in the
# title comments above
# Return to continue GET-ting of the file as usual (that is, checking
# of a modification timestamp and sending the file content)
# Note that the file is already stat(), so we can use _ for all
# file tests.
sub preopen_file {
my $file_path = shift;
my $mode_str = shift;
$mode_str eq 'R---' and return;
my ($mode_r,$mode_w,$mode_c,$mode_t) = unpack("aaaa",$mode_str);
if( not -e _ ) { # if the file does not exist
$mode_c eq '-'
&& bail_out "404 File not found", "File does not exist and O_CREAT not set";
$mode_c eq 'C' || $mode_c eq 'X' || bail_out "Invalid mode_c in $mode_str";
open(FILE,">$file_path") or bail_out "403 Forbidden",
"Failed to create a non-existent file $file_path due to $!";
close FILE;
print "Status: 201 Created\n\n";
exit 0
}
# File does exist
$mode_c eq 'X' && bail_out "403 File already exists",
"File $file_path already exists";
if( $mode_t eq 'T' ) {
open(FILE,">$file_path") or bail_out "403 Forbidden",
"Truncation of $file_path failed due to $!";
close FILE;
print "Status: 201 Truncated\n\n";
exit 0;
}
$mode_t eq '-' || bail_out "Invalid mode_t in $mode_str";
$mode_w eq '-' || -w _ || bail_out "403 Forbidden",
"File $file_path is not writable";
}
# Get a file or directory $1
sub get_file {
my $file_path = shift;
my $sub_command = $ENV{HTTP_PRAGMA};
$sub_command =~ s/httpfs=\"(\S+)\"/$1/;
stat($file_path);
($sub_command =~ /preopen-(\S\S\S\S)/) && preopen_file $file_path,$1;
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks) = stat(_);
$mtime || bail_out "404 Not found or accessible",
"$file_path not found or is unreadable: $!";
print_last_modif $mtime;
my $modif_since = $ENV{HTTP_IF_MODIFIED_SINCE}; # like Tue, 28 Jul 1998 22:31:49 GMT
if( $_ = $modif_since ) {
my ($day,$ascii_mon,$year,$hh,$min,$sec) =
m/\A\w\w\w, (\d\d) (\w\w\w) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT/;
$day || bail_out "wrong date format in $modif_since";
my ($mtime_sec,$mtime_min,$mtime_hour,$mtime_mday,$mtime_mon,
$mtime_year,$mtime_wday,$mtime_yday,$mtime_isdst) =
gmtime($mtime);
my $mon = index "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec",$ascii_mon;
$mon >= 0 || bail_out "wrong month name in $modif_since";
$mon = $mon/4; # $mon is zero based
#my $ts1=sprintf("19%2d%02d%02d%02d%02d%02d",
# $mtime_year,$mtime_mon,$mtime_mday,$mtime_hour,$mtime_min,$mtime_sec);
#my $ts2=($year . sprintf("%02d",$mon) . $day . $hh . $min . $sec);
# bail_out "modif >$ts1< >$ts2<";
if( sprintf("%4d%02d%02d%02d%02d%02d",
($mtime_year+1900),$mtime_mon,$mtime_mday,$mtime_hour,$mtime_min,$mtime_sec)
le ($year . sprintf("%02d",$mon) . $day . $hh . $min . $sec) ) {
print "Status: 304 Not Modified\n\n";
exit 0;
}
}
print "ETag: \""; print format_stat_info(stat(_)); print "\"\n";
$sub_command eq "readlink" && read_link($file_path);
-d _ && list_directory($file_path);
send_regular_file($file_path);
bail_out "404 Not found or accessible",
"$file_path not found, or unreadable: $!";
exit 0;
}
# Main module
my $req_method = $ENV{REQUEST_METHOD}
|| bail_out "REQUEST_METHOD is missing: the script isn't called by a HTTP server";
my $file_path = $ENV{PATH_INFO};
# IIS prepends the name of the script to the PATH_INFO
# so we remove it if we come across it
$file_path =~ s/\A$ENV{SCRIPT_NAME}//;
$file_path
|| bail_out "PATH_INFO is missing: the script isn't called by a HTTP server";
my $Is_IIS = $ENV{SERVER_SOFTWARE} =~ /\AMicrosoft-IIS/;
# URL-unquote any special characters
$file_path =~ tr/+/ /;
$file_path =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$file_path = $file_path =~ /\A\/DeepestRoot/ ? $' :
($Is_IIS ? "C:/InetPub$file_path" : $ENV{PATH_TRANSLATED});
$file_path or ($file_path = "/");
#bail_out "IIS is $Is_IIS";
#bail_out "file $file_path";
#my $env_out=""; while( ($key,$val)=each(%ENV) ) { $env_out .= "$key=$val
" }
#bail_out "env $env_out";
$req_method eq "HEAD" && inquiry_file($file_path);
$req_method eq "PUT" && put_file($file_path);
$req_method eq "GET" && get_file($file_path);
#$req_method eq "DELETE" && delete_file($file_path);
bail_out "405 Method not allowed",
"$req_method method is invalid or not allowed";