#!/usr/local/bin/perl  -w
#
#
#  Program Name:	netatalk2afp.pl
#  Author:			Steve Holmes (based on code borrowed from 
#				William G. Wolber Jr.)
#  e-mail:			sholmes@purdue.edu
#  Office:			YONG 540, Purdue University
#  Department:		ITaP/ICS (Information Technology at Purdue University /
#				Instructional Computing Services)
#  Description:		This program recursively traverses the home
#			directories of users career accounts.  These
#			home directories are located on ICS fileservers,
#			and are named according to the following
#			convention (which relates them to their 
#			associated users):
#
#		/home/${hostname}/${letter}|${letter}${digit}/${username}
#
#			The program searches for any files 
#			that are fingerprinted as AppleDouble metadata
#			files.
#			These files were created by the netatalk afp server
#			software. They are converted to a format and
#			naming/location scheme compatable with the
#			more recent Mac OS X afp server. 
#
#  Glossary:
#
#	@shdw_uid	--	Boolean mask array indexed by uid #'s.  Normally,
#				$shdw_uid[i] is set to 0.  However, when set to 1,
#				$shdw_uid[i] is used as 'mask', to skip over certain
#				exceptional uid's.  A maximum uid of 99999 is
#				assumed.
#
#	@list		--  Placeholder array for /etc/passwd file record
#				entries.  Used to scan /etc/passwd.
#
#	$login_id	--	Scalar placeholder for a user's login id (username).  
#
#	$home_dir	--	Scalar placeholder for the absolute pathname of
#				a user's home directory's 
#
#	%home_dir	--	Associative array of all the absolute home directory
#				pathnames, indexed by the corresponding login id
#				(username).
#

use Math::BigInt;
use Sys::Hostname;

my $file_system = "";
$file_system = shift;

die( "File system must be absolute.") if ("/" ne substr($file_system,0,1));
die( "can't find file_system $file_system") if ( ! -d $file_system );

$logtail=substr($file_system,rindex($file_system,"/") + 1);

$logfile = "/var/tmp/log.$logtail";
open(LOGFILE, ">$logfile") or die( "Cannot open $logfile for writing");

# now the log file (i.e. logdie) can be used.
# 
#  Find the fully qualified DNS domain hostname of the
#  machine we are running on, and chop it down to just the
#  "hostname" part.
#
$fullhostname = `hostname`;
chop( $fullhostname );
@parts = split(/\./, $fullhostname );
$hostname = $parts[0];

$debug = 0;

if ($debug) {
    logit(  "hostname = $hostname\n");
}

select STDOUT;
$| = 1;
system("date");

$count_afpfiles = 0;
 

#################################################################
#								#
#  PART II: Walk the home directory trees (preorder).		#
#								#
#  For each $home_dir, list the files in it.  			#
#								#
#################################################################

select STDOUT;
$| = 1;					

&traverse( $file_system );	# recursive
    
select STDOUT;
$| = 1;

############################
#
#  traverse: usage: traverse(directory)
#            recursively descends a directory structure
#            looking for .AppleDouble directories and
#            converting them to MacOS X AppleDouble files.
#
############################

sub traverse {
    my $dir = $_[0];
    my $pdir;
    my $ppdir;
    my @files;
    my $file;
    my $dforkpath;
    my $ndforkpath;
    my $newfilename;
    my $newadfilepath;
    my $APPLEDOUBLEMAGIC = 0x00051607;
    my $MINADFILESIZE = 10;

#
#  First, read all the filenames in the current working
#  directory, using directory handles.  This way, we
#  need not distinguish between visible and invisible
#  files, we do not have to fork() a shell process for
#  every directory and we don't have to worry about
#  whether an invoked shell has limitations on list
#  sizes.  Besides, it proved necessary.
#

    if ( ! opendir(DIR, $dir ) ) {
	closedir(DIR);
	logit( "Could not open directory $dir for reading!\n");
	return;
    }

    if ( $debug > 1 ) {
	logit( "Traversing directory $dir.\n");
    }

    @files = readdir(DIR);
    closedir(DIR);

#
#  Examine each file.
#

  FILE: foreach $file (@files) {

#
#  Skip over the . and .. directories. 
#

      if ( $file eq "." ) {
	  next;
      }
      if ( $file eq ".." ) {
	  next;
      }

      if ( -l $dir."/".$file ) { # ignore symlinks
	  next;
      }
      elsif ( -f _ ) {
	  if ( $debug > 1 ) {
	      logit( "Inside traverse() $dir/$file is an ordinary file.\n");
	  }

#
#  Check for AppleDouble files. 
#
#    first if we are in a directory named .AppleDouble all the files here are AppleDouble 
#    files

	  $pdir=substr($dir,rindex($dir,"/") + 1); # get the parent directory short name.
	  if ( $pdir eq  ".AppleDouble" ) { # netatalk AppleDouble files are in this directory
	      $adfilepath = $dir."/".$file;
	      $ppdir=substr($dir,0,rindex($dir,"/")); # get the path to the directory above the parent.
	      if ($debug > 1) {
		  logit("Found potential Apple Double File: $adfilepath\n");
	      }

#
# collect some data for later use.
#
	      ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size) = stat($adfilepath);
#
# Can't handle empty or short files. Ignore them.
#
	      if ( $size < $MINADFILESIZE ) {
		  logit("Ignoring small file $adfilepath, size = $size");
		  next FILE;
	      }
#
#  Check for a genuine AppleDouble file.
#
	      my $fh = 'FH';
	      if ( sysopen($fh,$adfilepath,0) ) {
		  my $magic = GetNum($fh,4);
		  if ($magic != $APPLEDOUBLEMAGIC) {
		      if ( $debug > 1 ) {
			  logit("Not Apple Double Format file!: $adfilepath");
		      }
		      close $fh;
		      next FILE;
		  }
		  close $fh;
	      } else {
		  logit("Failed to open $adfilepath, $!");
		  next FILE;
	      }
	      $count_afpfiles++;
		  
	      if ( $file eq ".Parent") { # dealing with the ad file for the parent of the current .AppleDouble dir.
		  $gpdir=substr($ppdir,0,rindex($ppdir,"/")); # get the grandparent of the directory above the parent.
		  $dname=substr($ppdir,rindex($ppdir,"/") +1 ); # just the name of the directory
		  $newfilename = mknewfilenm($dname);
		  $dforkpath = $gpdir."/".$dname; # complete old datafork path
		  $ndforkpath = $gpdir."/".$newfilename; # the new datafork path
		  $newadfilepath = $gpdir."/"."._".$newfilename; # the new resource/metafile path
	      } else {
		  $newfilename = mknewfilenm($file); 
		  $dforkpath = $ppdir."/".$file; # complete old datafork path
		  $ndforkpath = $ppdir."/".$newfilename; # the new datafork path
		  $newadfilepath = $ppdir."/"."._".$newfilename; # the new resource/metafile path
	      }

	      # if a datafork file does not already exist, create one
	      # using the new name for the datafork, otherwise rename
	      # the existing one to the new name.
	      # Note, if the $file is ".Parent" the resulting contents of both 
	      # $dforkpath and $ndforkpath is a directory name, so we don't 
	      # need to check it, and certainly don't want to created it this way.

	      if ($file ne ".Parent" && ! -f $dforkpath && ! -f $ndforkpath ) {
		  if ( $debug >=2 ) {
		      logit("DEBUG: would have created $ndforkpath");
		  } else {
		      if (open(DF,">$ndforkpath")) {
			  close DF;
			  chmod 0600, $ndforkpath;
			  chown $uid, $gid, $ndforkpath;
			  if ($debug) {
			      logit("Created $ndforkpath");
			  }
		      } else {
			  logit("unable to create $ndforkpath");
			  close DF;
		      }
		  }
	      } else {
		  if ($dforkpath ne $ndforkpath) {
		      if ( $debug >=2 ) {
			  logit("DEBUG: would have attempted rename of $dforkpath to $ndforkpath");
		      } else {
			  rename($dforkpath,$ndforkpath) or
			      logit("unable to rename $dforkpath: $!");
			  if ($debug) {
			      logit("Renamed $dforkpath to $ndforkpath");
			  }
		      }
		  }
	      }

	      # Calling the conversion routine with the recommended args
	      # and with the correct new names and paths.

	      @cmd = ("/home/harmony/a/sjh/bin/sun4/appledouble",
		      "-r92","-v2","-s",
		      "$adfilepath","$newadfilepath");
	      if ( $debug > 1) {
		  
		  print LOGFILE "Command: ";
		  for (@cmd) {
		      print LOGFILE $_," ";
		  }
		  print LOGFILE "\n";
	      }
	      if($debug) {
		  logit("Converting $adfilepath");
	      }
	      if( $debug >=2 ) {
		  logit("Would have done the appledouble conversion here.");
	      } else {
		  if ( system(@cmd) == 0) {	# here's where the appledouble conversion is done
		      chmod 0600, $newadfilepath;
		      chown $uid, $gid, $newadfilepath;
		      logit("Changed $adfilepath to $newadfilepath");
		      if (unlink($adfilepath)) {
			  logit("Removed $adfilepath");
		      } else {
			  logit("Unlink of $adfilepath failed: $!");
		      }
		  } else {
		      print STDERR "****appledouble command failed $!";
		      print STDERR ", converting $adfilepath\n";
		      logit( "****appledouble command failed $!, converting $adfilepath");
		      logit("    $adfilepath not removed.");
		  }
	      }
	  }	
      } elsif ( -d _ ) {
	  if ( $debug > 1 ) {
	      logit( "Inside traverse() $dir/$file is a directory.\n");
	  }
	  #
	  # skip lost+found and tomb
	  # 
	  next FILE if ("lost+found" eq $file && $dir eq $file_system);
	  next FILE if ("tomb" eq $file && $dir eq $file_system);
	  
	  &traverse( $dir."/".$file );
      } else {			#  end elsif( -d ...)
	  if ($debug > 1) {
	      logit( "Inside traverse() $dir/$file is an unknown file type.\n");
	  }
      }
  }				#  end foreach() 
}				#  end traverse()

sub mknewfilenm {
    my $fn = shift;
#
#  the new file name is the same as the old file name with the following
#  changes:
#   Change any :2f sequences to :
#  call to roman2utf8 converts MacRoman to UTF8 formatted file names.
#
    $fn =~ s/:2f/:/g;
    return (roman2utf8($fn));
}

#
# functions stolen from Broc Seib.
#

sub GetNum {
    my $fh    = shift;		## the filehandle (string)
    my $bytes = shift;
    my $offset = shift || undef;
    my $buf = GetBuf($fh,$bytes,$offset);
    my $num;
    if ($bytes == 4) {
	$num = unpack('l',$buf);
    } elsif ($bytes == 2) {
	$num = unpack('S',$buf);
    } else {
	$num = undef;
    }
    return $num;
}

sub GetBuf {
    my $fh    = shift;		## the filehandle (string)
    my $bytes = shift;
    my $offset = shift || undef;
    my $buf;
    my $len;
    
    if (defined($offset)) {
	## user wants to lseek to different spot in file
	seek($fh,$offset,0); ## offset from beginning of file.
    }
    my $bytes_requested = $bytes;
    my $buf_index = 0;
    do {
	$len = sysread($fh,$buf,$bytes_requested,$buf_index);
	$bytes_requested -= $len;
	$buf_index += $len;
    } while ($bytes_requested > 0);
    return $buf;
}

sub logit {
    my $msg = shift;
    my $line = scalar localtime() ;
    print LOGFILE "$line $msg\n";
#    print STDOUT "$msg\n" ;
}

sub logdie {
    my $msg = shift;
    logit($msg);
    die "$msg\n[$!]\n";
}

#
# roman2utf8
# ---
# This script takes one argument, a filename, translates it into
# MacOS X Finder-compatible (extended character) UTF8, and
# renames the file.
#
# Be sure the filename you're running it on is MacRoman or
# it's name will get foobar-ed
#
    
#
# YOU'LL PROBABLY WANT TO MODIFY HOW PARAMETER
# GETTING AND LINK/UNLINKING WORKS
#
    
# we can't just use UTF8 character here because the OSX Finder
# is stupid and wants a certain kind of UTF8
    
sub roman2utf8 {
    $fn = shift;
    our %roman_to_utf8 = (
			  128 => 'A'.pack(CC,204,136),
			  129 => 'A'.pack(CC,204,138),
			  130 => 'C'.pack(CC,204,167),
			  131 => 'E'.pack(CC,204,129),
			  132 => 'N'.pack(CC,204,131),
			  133 => 'O'.pack(CC,204,136),
			  134 => 'U'.pack(CC,204,136),
			  135 => 'a'.pack(CC,204,129),
			  136 => 'a'.pack(CC,204,128),
			  137 => 'a'.pack(CC,204,130),
			  138 => 'a'.pack(CC,204,136),
			  139 => 'a'.pack(CC,204,131),
			  140 => 'a'.pack(CC,204,138),
			  141 => 'c'.pack(CC,204,167),
			  142 => 'e'.pack(CC,204,129),
			  143 => 'e'.pack(CC,204,128),
			  144 => 'e'.pack(CC,204,130),
			  145 => 'e'.pack(CC,204,136),
			  146 => 'i'.pack(CC,204,129),
			  147 => 'i'.pack(CC,204,128),
			  148 => 'i'.pack(CC,204,130),
			  149 => 'i'.pack(CC,204,136),
			  150 => 'n'.pack(CC,204,131),
			  151 => 'o'.pack(CC,204,129),
			  152 => 'o'.pack(CC,204,128),
			  153 => 'o'.pack(CC,204,130),
			  154 => 'o'.pack(CC,204,136),
			  155 => 'o'.pack(CC,204,131),
			  156 => 'u'.pack(CC,204,129),
			  157 => 'u'.pack(CC,204,128),
			  158 => 'u'.pack(CC,204,130),
			  159 => 'u'.pack(CC,204,136),
			  160 => pack(CCC,226,128,160),
			  161 => pack(CC,194,176),
			  162 => pack(CC,194,162),
			  163 => pack(CC,194,163),
			  164 => pack(CC,194,167),
			  165 => pack(CCC,226,128,162),
			  166 => pack(CC,194,182),
			  167 => pack(CC,195,159),
			  168 => pack(CC,194,174),
			  169 => pack(CC,194,169),
			  170 => pack(CCC,226,132,162),
			  171 => pack(CC,194,180),
			  172 => pack(CC,194,168),
			  173 => pack(CCC,226,137,160),
			  174 => pack(CC,195,134),
			  175 => pack(CC,195,152),
			  176 => pack(CCC,226,136,158),
			  177 => pack(CC,194,177),
			  178 => pack(CCC,226,137,164),
			  179 => pack(CCC,226,137,165),
			  180 => pack(CC,194,165),
			  181 => pack(CC,194,181),
			  182 => pack(CCC,226,136,130),
			  183 => pack(CCC,226,136,145),
			  184 => pack(CCC,226,136,143),
			  185 => pack(CC,207,128),
			  186 => pack(CCC,226,136,171),
			  187 => pack(CC,194,170),
			  188 => pack(CC,194,186),
			  189 => pack(CC,206,169),
			  190 => pack(CC,195,166),
			  191 => pack(CC,195,184),
			  192 => pack(CC,194,191),
			  193 => pack(CC,194,161),
			  194 => pack(CC,194,172),
			  195 => pack(CCC,226,136,154),
			  196 => pack(CC,198,146),
			  197 => pack(CCC,226,137,136),
			  198 => pack(CCC,226,136,134),
			  199 => pack(CC,194,171),
			  200 => pack(CC,194,187),
			  201 => pack(CCC,226,128,166),
			  202 => pack(CC,194,160),
			  203 => 'A'.pack(CC,204,128),
			  204 => 'A'.pack(CC,204,131),
			  205 => 'O'.pack(CC,204,131),
			  206 => pack(CC,197,146),
			  207 => pack(CC,197,147),
			  208 => pack(CCC,226,128,147),
			  209 => pack(CCC,226,128,148),
			  210 => pack(CCC,226,128,156),
			  211 => pack(CCC,226,128,157),
			  212 => pack(CCC,226,128,152),
			  213 => pack(CCC,226,128,153),
			  214 => pack(CC,195,183),
			  215 => pack(CCC,226,151,138),
			  216 => 'y'.pack(CC,204,136),
			  217 => 'Y'.pack(CC,204,136),
			  218 => pack(CCC,226,129,132),
			  219 => pack(CCC,226,130,172),
			  220 => pack(CCC,226,128,185),
			  221 => pack(CCC,226,128,186),
			  222 => pack(CCC,239,172,129),
			  223 => pack(CCC,239,172,130),
			  224 => pack(CCC,226,128,161),
			  225 => pack(CC,194,183),
			  226 => pack(CCC,226,128,154),
			  227 => pack(CCC,226,128,158),
			  228 => pack(CCC,226,128,176),
			  229 => 'A'.pack(CC,204,130),
			  230 => 'E'.pack(CC,204,130),
			  231 => 'A'.pack(CC,204,129),
			  232 => 'E'.pack(CC,204,136),
			  233 => 'E'.pack(CC,204,128),
			  234 => 'I'.pack(CC,204,129),
			  235 => 'I'.pack(CC,204,130),
			  236 => 'I'.pack(CC,204,136),
			  237 => 'I'.pack(CC,204,128),
			  238 => 'O'.pack(CC,204,129),
			  239 => 'O'.pack(CC,204,130),
			  240 => pack(CCC,239,163,191),
			  241 => 'O'.pack(CC,204,128),
			  242 => 'U'.pack(CC,204,129),
			  243 => 'U'.pack(CC,204,130),
			  244 => 'U'.pack(CC,204,128),
			  245 => pack(CC,196,177),
			  246 => pack(CC,203,134),
			  247 => pack(CC,203,156),
			  248 => pack(CC,194,175),
			  249 => pack(CC,203,152),
			  250 => pack(CC,203,153),
			  251 => pack(CC,203,154),
			  252 => pack(CC,194,184),
			  253 => pack(CC,203,157),
			  254 => pack(CC,203,155),
			  255 => pack(CC,203,135),
			  );
    

## now to actually do something:
    
    @chars = split(/(.)/,$fn);
    $utf8 = '';
    
    
    foreach $i (@chars) {
	if(ord($i)) {
	    $utf8 = $utf8.myConvert(ord($i));
	}
    }
    
    
    return $utf8;

#    if(link $fn,$utf8) {
#	unlink($fn);
#    } else {
#	die "Linking of file failed";
#    }
    
    
    1;
    
}

sub myConvert {
    $num = shift;
    
    if(exists($roman_to_utf8{$num})) {
        return $roman_to_utf8{$num};
    }
    else {
        return chr($num);
    }
}

##
#
# EOF
#
##