#!/usr/bin/perl
#
# VSS-to-Mercurial migration script v1.04 by Andy Duplain (trojanfoe@gmail.com)
#
# Based on the VSS-to-Subversion script by:
# Brett Wooldridge, Daniel Dragnea, Magnus Hyllander and Neil Sleightholm.
# (See: http://neilsleightholm.blogspot.com)
#
# Future updates to this script will be uploaded to:
# http://trojanfoe.org.uk/vss2hg
#
# Typical usage:
#
# C:\> vss2hg.pl --ssrepo=C:\path\to\vssrepo --sshome="C:\Program Files\Microsoft Visual Studio\VSS\win32" $/vssproj1
#
# (specify the full path to both ssrepo and sshome).
#
# This will create a Mercurial repository called 'hgrepo' in the current
# directory - this can then be cloned or copied to its final destination.
#
# Version History
# 1.07  13-Mar-2012 Force replacement of tags with same name.
# 1.06  16-Feb-2012 Enabled escaping of double quotes.
# 1.05  19-Aug-2010 Ignore hg commit error when nothing changed.
#                   Handle commas in file names.
#                   Keep empty lines in comments.
# 1.04  02-Nov-2009 Handle commas, colons, and empty lines in labels.
#                   Ignore regex patterns in file names.
#                   Skip rollbacks.
# 1.03  11-Aug-2009 Allow VSS binaries to be invoked on Linux using Wine.
# 1.02	09-Feb-2009 Don't append '+' to comment within build_atoms().
# 1.01  30-Jan-2009 Fix bug in label comment.
# 1.00	29-Jan-2009 Initial
#

my $VERSION = "1.07";

use strict;
use POSIX;
use File::Path;

# Set $USE_WINE to invoke SS.EXE using Wine
my $USE_WINE = 0;

# Set $US_DATE_FORMAT > 0 if VSS generates dates in US format (MM-DD-YY),
# else 0 if it generates dates in UK format (DD-MM-YY)
my $US_DATE_FORMAT = 1;

# Adjust for Windows/Linux...
my $WINDOWS = 1;
my $FILESEP = '/';
if ($WINDOWS > 0)
{
	$FILESEP = '\\';
}

my $DEBUG = 1;

my $RESUME = 0;
my $RESUMEAFTERATOM = '';
my $MIGRATELATEST = 0;
my $DUMPUSERS = 0;
my $FORCEUSER = '';
my $SSREPO = '';
my $SSPROJ = '';
my $SSHOME = '';
my $SSCMD = '';
my $CUTOFFDATE = 0;

# This is the username and password used for migration operations
my $USERNAME = 'admin';
my $PASSWORD = '';

my $PHASE = 0;

my @directorylist = ();
my @filelist = ();
my @histories = ();
my %atomlist;
my @atoms;

my $datestring;

if ($DEBUG == 1)
{
    open(STDERR, "> vss2hg.log");
}

&parse_args(@ARGV);

&setup();

$datestring = prettydate();
print "Migration started: $datestring\n";
print STDERR "Migration started: $datestring\n";

if ($MIGRATELATEST)
{
    &get_latest_checkpoint();
}
elsif ($RESUME)
{
    &resume();
}

if ($PHASE < 1)
{
    print "Project: $SSPROJ\n";
    &build_directorylist($SSPROJ);
}

if ($PHASE < 2)
{
    &build_filelist();
}

if ($PHASE < 3)
{
    &build_histories();
    &dump_users();
}

if ($DUMPUSERS)
{
    &dump_users_and_exit();
}

&build_atoms;

if ($MIGRATELATEST)
{
    print "\nHistory has now been refreshed. You can compare atoms.txt.1 with atoms.txt to\n";
    print "see if new data to be migrated has been checked in to the VSS repository after\n";
    print "the previous run. Also verify that the last line of extract-progress.txt lists\n";
    print "the last atom that was processed. When satisfied, you can process new atoms\n";
    print "with the --resume option.\n\n";
    exit;
}

if ($PHASE < 5)
{
    &create_directories;
}

if ($PHASE < 6)
{
    ##&checkout_directories;
}

&extract_and_import;

if ($DEBUG)
{
    close(DEBUG);
}

$datestring = prettydate();
print "\nMigration complete: $datestring\n\n";
print STDERR "\nMigration complete: $datestring\n\n";

exit;


##############################################################
# Parse Command-line arguments
#
sub parse_args
{
    my $argc = @ARGV;
    if ($argc < 1)
    {
        print "vss2hg.pl: missing command arguments\n";
        print "Try 'vss2hg.pl --help' for more information\n\n";
        exit -1;
    }

    if ($ARGV[0] eq '--help')
    {
        print "Visual SourceSafe to Mercurial Migration Tool - v$VERSION\n\n";
        print "Usage: vss2hg.pl [options] project\n\n";
        print "Migrate a Visual SourceSafe project to Mercurial.\n\n";
        print "  --resume\t\tresume the migration from last checkpoint\n";
        print "\t\t\tlast checkpoint\n";
        print "  --ssrepo=<dir>\trepository path, e.g. \\\\share\\vss\n";
        print "  --sshome=<dir>\tVSS installation directory\n";
        print "  --force-user=<user>\tforce the files to be checked into Mercurial as\n";
        print "\t\t\tas user <user>\n";
        print "  --cutoff-date=<yyyymmdd>\tminimum date to import\n";
        print "  --dumpusers\t\tafter pre-processing the VSS repository, create a\n";
        print "\t\t\tusers.txt file which can be used to create comparable\n";
        print "\t\t\taccounts in Mercurial.  The migration can be resumed\n";
        print "\t\t\twithout penalty by using the --resume option\n\n";
        exit -1;
    }

    for (my $i = 0; $i < $argc; $i++)
    {
        my $arg = $ARGV[$i];
        if ($arg eq '--resume')
        {
            $RESUME = 1;
        }
        elsif ($arg =~ /--migrate-latest/)
        {
            $MIGRATELATEST = 1;
        }
        elsif ($arg eq '--dumpusers')
        {
            $DUMPUSERS = 1;
        }
        elsif ($arg =~ /--ssrepo=/)
        {
            $SSREPO = $';
        }
        elsif ($arg =~ /--sshome=/)
        {
            $SSHOME = $';
        }
        elsif ($arg =~ /--force-user=/)
        {
            $FORCEUSER = $';
        }
        elsif ($arg =~ /--cutoff-date=/)
        {
            # Convert parameter from yyyymmdd to a datetime
            my $dateparam = $';
            $CUTOFFDATE = POSIX::mktime(0, 0, 0, substr($dateparam, 6, 2), substr($dateparam, 4, 2) - 1, substr($dateparam, 0, 4) - 1900, -1, -1, -1);
        }
		
		$SSPROJ = $arg;
	}

	if ($SSPROJ !~ /^\$\/\w+/ && $SSPROJ ne '$/' )
	{
		print "Error: missing or invalid project specification, must be of the form \$/project or \$/\n\n";
		exit -1;
	}
}


##############################################################
# Check environment and setup globals
#
sub setup
{
    $SSREPO = @ENV{'SSDIR'} unless length($SSREPO) > 0;
    if ($SSREPO eq '' || length($SSREPO) == 0)
    {
        die "Environment variable SSDIR must point to a SourceSafe repository.";
    }
    $SSHOME = @ENV{'SS_HOME'} unless length($SSHOME) > 0;
    if ($SSHOME eq '' || length($SSHOME) == 0)
    {
        die "Environment variable SS_HOME must point to where SS.EXE is located.";
    }

    $ENV{'SSDIR'} = $SSREPO;
    $SSCMD = "$SSHOME";
    if ($SSCMD !~ /^\".*/)
    {
        $SSCMD = "\"$SSCMD\"";
    }
    if ($USE_WINE > 0)
    {
        $SSCMD="wine $SSHOME/SS.EXE";
    }
    else
    {
        $SSCMD =~ s/\"(.*)\"/\"$1\\SS.EXE\"/;
    }

    my $banner = "Visual SourceSafe to Mercurial Migration Tool - v$VERSION\n" .
			     "by Andy Duplain (trojanfoe\@gmail.com)\n" .
				 "Based on the VSS-to-Subversion script by:\n" .
				 "Brett Wooldridge, Daniel Dragnea, Magnus Hyllander and Neil Sleightholm.\n" .
		         "SourceSafe repository: $SSREPO\n" .
                 "SourceSafe directory : $SSHOME\n" .
				 "SourceSafe project   : $SSPROJ\n";

    if (0 == $CUTOFFDATE){
        $banner .= "History cut off      : not set\n\n";
    }else{
        $banner .= "History cut off      : " . POSIX::ctime($CUTOFFDATE) . "\n\n";
    }
    
    print "$banner";
    if ($DEBUG)
    {
        print STDERR "$banner";
    }
}


##############################################################
# Build project directory hierarchy
#
sub build_directorylist
{
    my($proj) = @_;

    if ($DEBUG)
    {
        print STDERR "\n#############################################################\n";
        print STDERR "#              Subroutine: build_directorylist              #\n";
        print STDERR "#############################################################\n";
    }

    print "Building directory hierarchy...";

    my $oldcount = @directorylist;

    recursive_build_directorylist($proj);

    sort(@directorylist);
    open(DIRS, "> directories.txt");
    foreach my $dir (@directorylist)
    {
        print DIRS "$dir\n";
    }
    close(DIRS);

    my $count = @directorylist - $oldcount;
    print "\b\b\b:\tdone ($count dirs)\n";

    $PHASE = 1;
}

sub recursive_build_directorylist
{
    my ($proj) = @_;
    push @directorylist, $proj;

    my $cmd = $SSCMD . " Dir \"$proj\" -I- -F- -R-";
    $_ = `$cmd`;
    if ($DEBUG) {
        print STDERR "\nDirectory listing of $proj:\n$_";
    }
    my @lines = split('\n');
    foreach my $line (@lines)
    {
        chomp($line);
        if ($line =~ /^\$([^\/][^:]+)$/) {
            recursive_build_directorylist("$proj/$1");
        }
    }
}


##############################################################
# Build a list of files from the list of directories
#
sub build_filelist
{
    if ($DEBUG)
    {
        print STDERR "\n#############################################################\n";
        print STDERR "#                Subroutine: build_filelist                 #\n";
        print STDERR "#############################################################\n";
    }

    my ($proj, $cmd, $i, $j, $count);

    print "Building file list (  0%):            ";

    $count = @directorylist;

    $i = 0;
    $j = 0.0;
    foreach $proj (@directorylist)
    {
        ###$* = 1;
        $/ = ':';

        $cmd = $SSCMD . " Dir -I- \"$proj\"";
        $_ = `$cmd`;

        # what this next expression does is to merge wrapped lines like:
        #    $/DeviceAuthority/src/com/eclyptic/networkdevicedomain/deviceinterrogator/excep
        #    tion:
        # into:
        #    $/DeviceAuthority/src/com/eclyptic/networkdevicedomain/deviceinterrogator/exception:
        s/\n((\w*\-*\.*\w*\/*)+\:)/$1/g;

        ###$* = 0;
        $/ = '';
        my @lines = split('\n');
        LOOP: foreach my $line (@lines)
        {
            last LOOP if ($line eq '' || length($line) == 0);

            if ($line !~ /(.*)\:/ && $line !~ /^\$.*/ && $line !~ /^([0-9]+) item.*/ && $line !~ /^No items found.*/)
            {
                # Pinned files are returned as "file;n" remove ";n"
                my @file = split(/;/,$line);
                # Exclude vss files e.g. files ending .vsscc, .vssscc, .vspscc etc
                if (@file[0] =~ /.*\.\w{2,3}scc$/) 
                {
                    print STDERR "Skipping VSS file: $proj/@file[0]\n";
                }
                else
                {
                    push(@filelist, "$proj/@file[0]");
                    printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b(%3d\%):      %-6d", (($j / $count) * 100), $i);
                    if ($DEBUG)
                    {
                        print STDERR "$proj/@file[0]\n";
                    }
                    $i++;
                }
            }
        }
        $j++;
    }

    open(FILES,">files.txt");
    for my $file (@filelist)
    {
        $file =~ s/,/__COMMA__/g;
        print FILES "$file\n";
    }
    close(FILES);

    printf "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b:             done ($i files)\n";

    $PHASE = 2;
}


##############################################################
# Build complete histories for all of the files in the project
#
sub build_histories
{
    if ($DEBUG)
    {
        print STDERR "\n#############################################################\n";
        print STDERR "#                Subroutine: build_histories                #\n";
        print STDERR "#############################################################\n";
    }

    my ($file, $pad, $padding, $oldname, $shortname, $diff);
    my ($i, $count, $versioncount, $tmpname, $cmd);
	my $hist;

    print "Building file histories (  0%): ";

    $count = @filelist;
    $i = 0.0;
    $diff = 0;
    $pad = "                                                     ";
    $oldname = '';
    $shortname = '';
    foreach $file (@filelist)
    {
        $file =~ s/__COMMA__/,/g;

        # display sugar
        $oldname =~ s/./\b/g;
        $shortname = substr($file, rindex($file,'/') + 1);
        $diff = length($oldname) - length($shortname);
        $padding = ($diff > 0) ? substr($pad, 0, $diff) : '';
        print "$oldname";
        $tmpname = substr("$shortname$padding", 0, 45);
        printf("\b\b\b\b\b\b\b\b(%3d\%): %s", (($i / $count) * 100), $tmpname);
        $padding =~ s/./\b/g;
        print "$padding";
        $oldname = substr($shortname, 0 , 45);

        # real work
        $cmd = $SSCMD . " History -I- \"$file\"";
        $_ = `$cmd`;

        #print STDERR "$_"; # DEBUG ONLY
        &proc_history($file, $_, 0);

        $i++;
    }

	# Get the labels for the top-level project only, but use proc_history() to process
	# the output...
	$cmd = $SSCMD . " History -L -F- \"$SSPROJ\"";
	$_ = `$cmd`;
	&proc_history($SSPROJ, $_, 1);
	
	# If one of the user's had the clock set wrong on their PC then this
        # error will be generated later:
        #
	# ERROR: Files would be checked in in an unexpected order
        #
	# Fix this by adjusting the date to just before the last change (the
        # real date has been lost anyway so this will not cause any more damage
        # to the histories).
	my $last_file = "";
	my $last_timestamp = 0;
	foreach $hist (@histories)
	{
        my ($file, $version, $datetime, $timestamp, $user, $action, $comment) = split(',', $hist, 7);
		if ($file eq $last_file && $timestamp > $last_timestamp)
		{
			$timestamp = $last_timestamp - 1;
			$hist = join(',', $file, $version, "1990-01-01 00:00", $timestamp, $user, $action, $comment);
			print STDERR "Replaced date on $file v$version as it was out-of-order\n";
		}
		else
		{
			$last_file = $file;
			$last_timestamp = $timestamp;
		}
    }
		
    open(HIST, ">histories.txt");
    foreach $hist (@histories)
    {
        print HIST "$hist\n";
    }
    close(HIST);

    $oldname =~ s/./\b/g;
    $count = @histories;
    print "$oldname\b\b\b\b\b\b\b\b\b:        done ($count versions)" . substr($pad, 0, 20) . "\n";

    $PHASE = 3;
}

##############################################################
# Process the VSS history of a file.
#
sub proc_history
{
    my $file = shift(@_);
    my $hist = shift(@_);
	my $support_labels = shift(@_);

    $hist =~ s/Checked in\n/Checked in /g;

    #print "Starting processing of history file\n";

    use constant STATE_FILE    => 0;
    use constant STATE_VERSION => 1;
    use constant STATE_USER    => 2;
    use constant STATE_ACTION  => 3;
    use constant STATE_COMMENT => 4;
    use constant STATE_FINAL   => 5;

    my $state = STATE_VERSION;

    my $projre = '\$\/';

    my ($version, $junk, $user, $date, $time, $month, $day, $year);
    my ($hour, $minute, $path, $action, $datetime, $timestamp);

    my $readhist = 0;
    my $comment = '';
	my $label = '';
    my @lines = split('\n', $hist);
    my $line_count = @lines;
    my $i = 0;
    my $history_count = 0;

    #print STDERR ">>>> $file\n"; # DEBUG ONLY
    foreach my $line (@lines)
    {
        #print STDERR ">>>> state = $state: $line\n"; # DEBUG ONLY
        if ($state == STATE_VERSION && $line =~ /^\*+  Version ([0-9]+)/)
        {
            $version = $1;
            $readhist = 1;
            $state = STATE_USER;
        }
		elsif ($support_labels && $state == STATE_USER && $line =~ /^Label: "(.*)"/)
		{
			$label = $1;
			# Translate commas to dots
			$label =~ tr/,/./;
			# State is still STATE_USER
		}
        elsif ($state == STATE_USER && $line =~ /^User: /)
        {
            # Example: "User: Neil Sleightholm     Date:  9/03/01   Time:  8:15"
            $line =~ m/\w+:\s+(.*?)\s+\w+:\s+(.*?)\s+\w+:(.*)/;
            $user = $1;
            $date = $2;
            $time = $3;

            #($year,$month,$day) = split('-', $date); # yyyy-mm-dd date format
            if ($US_DATE_FORMAT > 0) {
                ($month,$day,$year) = split('/', $date); # US date format
            } else {
                ($day,$month,$year) = split('/', $date); # UK date format
            }
            ($hour,$minute) = split(':', $time);
            $year = ($year < 80) ? 2000 + $year : 1900 + $year;
            $datetime = sprintf("%04d-%02d-%02d %02d:%02d",$year,$month,$day,$hour,$minute);
            $timestamp = POSIX::mktime(0, $minute, $hour, $day, $month - 1, $year - 1900, -1, -1, -1);
            if (!defined($timestamp)) {
                print STDERR "$file:\n";
                print STDERR "$line => $year-$month-$day $hour:$minute => $timestamp\n";
                print "\$timestamp is undef!!!\n";
                exit;
            }
            $state = STATE_ACTION;
        }
        elsif ($state == STATE_ACTION)
        {
            if ($line =~ /^Checked in /)
            {
                if ($' =~ /^$projre/)
                {
                    $path = $';
                    $action = 'checkedin';
                    $state = STATE_COMMENT;
                }
                else
                {
                    $projre = $';
                    $projre =~ s/([\$\/\(\)])/\\$1/g;
                    $action = 'checkedin';
                    $state = STATE_COMMENT;
                }
            }
            elsif ($line =~ /^Created/)
            {
                $action = 'created';
                $state = STATE_COMMENT;
            }
            elsif ($line =~ /^Branched/)
            {
                $action = 'branched';
                $state = STATE_COMMENT;
            }
            elsif ($line =~ /^Rolled back/)
            {
                $action = 'rolledback';
                $state = STATE_COMMENT;
            }
            elsif ($support_labels && $line =~ /^Labeled/)
            {
                $action = "labeled $label";
                $state = STATE_COMMENT;
            }
            elsif ($line =~ / added/)
            {
                $path = $`;
                $action = 'added';
                $state = STATE_COMMENT;
            }
            elsif ($line =~ / deleted/)
            {
                $path = $`;
                $action = 'deleted';
                $state = STATE_COMMENT;
            }
        }
        elsif ($state == STATE_COMMENT)
        {
		    # Don't abort comment on an empty line
            if ($line =~ /^\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*/)
            {
                $state = STATE_FINAL;
            }
            elsif ($line =~ /^Comment\:/ || ($support_labels && $line =~ /^Label comment\:/))
            {
                $comment = trim($');
            }
            elsif (length($comment) > 0)
            {
                $comment = $comment . '__NL__' . trim($line);
            }
        }

        $i++;
        if ($state == STATE_FINAL || $readhist && $i == $line_count)
        {
            # Ignore history before cuttoff unless no history has been found
            if (0 != $CUTOFFDATE && $history_count > 0 && $timestamp < $CUTOFFDATE)
            {
                print STDERR "History too old: $history_count, version: $version - " . POSIX::ctime($timestamp); # DEBUG ONLY
                last;
            }

            $file =~ s/,/__COMMA__/g;

            $hist = join(',', $file, $version, $datetime, $timestamp, $user, $action, $comment);
            $comment = '';
            if ($DEBUG)
            {
                print STDERR "$hist\n";
            }
            push(@histories, $hist);
            $readhist = 0;
            $state = STATE_VERSION;

            # Don't abort comment on an empty line
            if ($line =~ /^\*+  Version ([0-9]+)/)
            {
                $version = $1;
                $readhist = 1;
                $state = STATE_USER;
            }

            # Only 'created', 'checkedin' count as history
            if ($action eq 'checkedin' || $action eq 'created')
            {
                $history_count++;
                # Ignore history before cuttoff
                if (0 != $CUTOFFDATE && $timestamp < $CUTOFFDATE)
                {
                    print STDERR "History too old: $history_count, version: $version - " . POSIX::ctime($timestamp); # DEBUG ONLY
                    last;
                }
            }
        }
    }
}


##############################################################
# Remove white space from the beginning and end of a string
#
sub trim
{
    my ($a) = @_;
    $a =~ s/^\s+//;    # remove whitespace at beginning
    $a =~ s/\s+$//;    # remove whitespace at end
    #$a =~ s/\s\s+/ /g; # replace multiple whitespace by a single space
    return $a;
}


##############################################################
# Dump the users from the repository into users.txt
#
sub dump_users
{
    my %USERHASH = ();
    my $count = 0;
    
    print "Building user list:";

    foreach my $hist (@histories)
    {
        my ($file, $version, $datetime, $timestamp, $user, $action, $comment) = split(',', $hist, 7);
        $USERHASH{$user} = 1;
    }

    open(USERS, "> users.txt");
    foreach my $user (keys %USERHASH)
    {
        print USERS "$user\n";
        $count++;
    }
    close(USERS);

    print "\t\tdone ($count users)\n";
}


##############################################################
# Dump the users from the repository into users.txt and exit
#
sub dump_users_and_exit
{
    &dump_users();
    
    print "\nUsers.txt file has been created.  Use the list of users in this\n";
    print "file to create matching user accounts in Subversion.  Ensure that these\n";
    print "accounts initially have NO AUTHENTICATION, otherwise the migration will\n";
    print "likely fail.  Alternatively, you can use the --force-user option to\n";
    print "create all files with the same username.  Either way, you can resume\n";
    print "this migration, picking up from this point, by using the --resume\n";
    print "option on the command line.\n\n";
    
    exit 0;
}


##############################################################
# Group files together that can be commited as an atomic unit,
# i.e. were checked in at the same time by the same user, and
# with the same comment.
#
sub build_atoms
{
    if ($DEBUG) {
        print STDERR "\n#############################################################\n";
        print STDERR "#                  Subroutine: build_atoms                  #\n";
        print STDERR "#############################################################\n";
    }

    print "Building atoms:   0%";

    %atomlist = ();

    my @userhist = sort sort_hist_by_user_timestamp @histories;

    my ($prevtime,$prevuser,$prevcomment) = (0,'','');
    my ($atom_user,$atom_datetime,$atom_timestamp,$atom_comment) = ('','',0,'');
    my $histcount = @userhist;
    my $i = 0;
    my $atom_files = {};
    
    foreach my $hist (@userhist)
    {
        # display sugar
        $i++;
        printf("\b\b\b\b%3d\%", (($i / $histcount) * 100));

        # real work
        my ($file,$version,$datetime,$timestamp,$user,$action,$comment) = split(/,/,$hist,7);

        # ignore actions which are not really new versions of the file
        next unless ($action eq 'checkedin' || $action eq 'created' || $action eq 'branched' || $action =~ '^labeled ');

        if ($user ne $prevuser || $comment ne $prevcomment || $timestamp - $prevtime >= 120 || exists $$atom_files{$file})
        {
            if ($prevtime != 0) 
            {
                #print STDERR "New atom ($prevuser/$user, $prevcomment/$comment, " . ($timestamp - $prevtime) . ")\n"; # DEBUG ONLY
                my $newatom = join(',',$atom_user,$atom_datetime,$atom_timestamp,$prevtime,$atom_comment);
                while (exists $atomlist{$newatom}) {
                    $newatom .= "+";
                }
                $atomlist{$newatom} = $atom_files;
                #print STDERR "$newatom\n"; # DEBUG ONLY
                #for my $f (values %$atom_files) {
                #    print STDERR "  $f\n";
                #}
            }
            $atom_files = {};
        }
        if (scalar %$atom_files == 0) 
        {
            $atom_user = $user;
            $atom_timestamp = $timestamp;
            $atom_datetime = $datetime;
            $atom_comment = $comment;
            if (length($atom_comment) == 0) {
				$atom_comment = "No comment provided";
            }
        }
        $$atom_files{$file} = join(',',$file,$version,$action);
        $prevtime = $timestamp;
        $prevuser = $user;
        $prevcomment = $comment;
    }
    my $newatom = join(',',$atom_user,$atom_datetime,$atom_timestamp,$prevtime,$atom_comment);
    #while (exists $atomlist{$newatom}) {
    #    $newatom .= "+";
    #}
    $atomlist{$newatom} = $atom_files;

    # check for conflicting atoms
    @atoms = sort sort_atoms_by_timestamp (keys %atomlist);
    
    my %fileversions = ();
    my $error = 0;
    $i = 0;
    while ($i < $#atoms) 
    {
        my ($atoma,$atomb) = ($atoms[$i],$atoms[$i+1]);
        my ($usera,$datetimea,$timestamp1a,$timestampna,$commenta) = split(/,/,$atoma,5);
        my ($userb,$datetimeb,$timestamp1b,$timestampnb,$commentb) = split(/,/,$atomb,5);
        # check if atomb overlaps atoma in time
        if ($timestamp1a <= $timestamp1b && $timestamp1b <= $timestampna) 
        {
            my $reversed = 0;
            # check if the atoms are updating the same file in the wrong order
          CHECK:
            for my $filea (values %{$atomlist{$atoma}}) 
            {
                my ($fna,$vera,$resta) = split(/,/,$filea,3);
                for my $fileb (values %{$atomlist{$atomb}}) 
                {
                    my ($fnb,$verb,$restb) = split(/,/,$fileb,3);
                    if ($fna eq $fnb && $vera > $verb) 
                    {
                        if ($reversed) 
                        {
                            print STDERR "ERROR: Conflicting atoms, reversing order didn't help:\n$atoma:\n  $filea\n$atomb:\n  $fileb\n";
                            print "ERROR: Conflicting atoms\n";
                            $error = 1;
                            goto DUMP;
                        }
                        else 
                        {
                            # Two atoms where checked in at the same time
                            print STDERR "Conflicting atoms, trying to reverse order:\n$atoma:\n  $filea\n$atomb:\n  $fileb\n";
                            ($atoms[$i],$atoms[$i+1]) = ($atomb,$atoma);
                            ($atoma,$atomb) = ($atoms[$i],$atoms[$i+1]);
                            $reversed = 1;
                            goto CHECK;
                        }
                    }
                }
            }
            if ($reversed) {
                print STDERR "Conflict resolved!\n";
            }
        }

        for my $filea (values %{$atomlist{$atoma}}) 
        {
           my ($fna,$vera,$resta) = split(/,/,$filea,3);
           if (exists $fileversions{$fna}) 
           {
                if ($fileversions{$fna} >= $vera) 
                {
                    print STDERR "ERROR: Files would be checked in in an unexpected order:\nAtom $i,$atoma\n File: $fna\n  cur: $fileversions{$fna}\n  new: $vera\n";
                    print "ERROR: Files would be checked in in an unexpected order\n";
                    $error = 1;
                    goto DUMP;
                }
            }
            $fileversions{$fna} = $vera;
        }

        $i++;
    }
    if ($DEBUG) {
        print STDERR "Atom and file order verified correctly.\n";
    }

  DUMP:
    open(ATOMLIST,">atoms.txt");
    for ($i = 0; $i <= $#atoms; $i++) 
    {
        print ATOMLIST "$i,$atoms[$i]\n";
        for my $file (values %{$atomlist{$atoms[$i]}}) {
            print ATOMLIST "  $file\n";
        }
    }
    close(ATOMLIST);
    if ($error) {
        exit;
    }

    printf("\b\b\b\b\t\tdone (%d atoms)\n", $#atoms + 1);
}


#######################################################################
# Sort the history by user and timestamp.
#
sub sort_hist_by_user_timestamp
{
    my ($patha,$versiona,$datetimea,$timestampa,$usera,$actiona,$commenta) = split(/,/,$a,7);
    my ($pathb,$versionb,$datetimeb,$timestampb,$userb,$actionb,$commentb) = split(/,/,$b,7);
    
    if ($usera ne $userb) {
        return $usera cmp $userb;
    }
    elsif ($timestampa != $timestampb) {
        return $timestampa <=> $timestampb;
    }
    elsif ($commenta ne $commentb) {
        return $commenta cmp $commentb;
    }
    elsif ($patha ne $pathb) {
        return $patha cmp $pathb;
    }
    return $versiona <=> $versionb;
}


#######################################################################
# Sort the atoms by timestamp(s). Sub sort by user and comment.
#
sub sort_atoms_by_timestamp
{
    my ($usera,$datetimea,$timestamp1a,$timestampna,$commenta) = split(/,/,$a,5);
    my ($userb,$datetimeb,$timestamp1b,$timestampnb,$commentb) = split(/,/,$b,5);
    
    if ($timestamp1a != $timestamp1b) {
        return $timestamp1a <=> $timestamp1b;
    }
    elsif ($timestampna != $timestampnb) {
        return $timestampna <=> $timestampnb;
    }
    elsif ($usera ne $userb) {
        return $usera cmp $userb;
    }
    return $commenta cmp $commentb;
}


#######################################################################
# Get the latest checkpoint so allow resuming after refreshing history.
#
sub get_latest_checkpoint
{
    my ($line);
    my $i = 0;

    backup("directories.txt",10);
    backup("files.txt",10);
    backup("histories.txt",10);
    backup("atoms.txt",10);

    if (-f "extract_progress.txt")
    {
        my $lastatom = '';
        print "Calculating extract progress:";
        open(EXTRACT, "< extract_progress.txt");
        while (<EXTRACT>)
        {
            chop($_);
            $lastatom = $_;
        }
        close(EXTRACT);

        $RESUMEAFTERATOM = $lastatom;
        if ($DEBUG)
        {
            print STDERR "Resume after atom: $RESUMEAFTERATOM\n";
        }
        print "\tresume after atom $RESUMEAFTERATOM\n";
    }
}

sub backup
{
    my ($fn,$maxbups) = @_;

    my $lastfn = $fn . "." . $maxbups;
    if (-f $lastfn) 
    {
        unlink($lastfn);
    }
    for (my $i=$maxbups-1; $i>=1; $i--) 
    {
        my $file = $fn . "." . $i;
        my $pfile = $fn . "." . ($i+1);
        if (-f $file) 
        {
            link($file,$pfile);
            unlink($file);
        }
    }
    if (-f $fn) 
    {
        link($fn,$fn . ".1");
        unlink($fn);
    }
}

##############################################################
# Resume from previously generated parsed project data
#
sub resume
{
    my ($line);
    my $i = 0;

    if (-f "directories.txt")
    {
        print "Loading directories:      ";

        $i = 0;
        open(DIRS, "< directories.txt");
        while (<DIRS>)
        {
            $line = $_;
            chop($line);
            push(@directorylist, $line);
            $i++;
            printf("\b\b\b\b\b%5d", $i);
        }
        close(DIRS);
        print "\b\b\b\b\b\t\tdone ($i dirs)\n";
        $PHASE = 1;
    }

    if (-f "files.txt")
    {
        print "Loading files:       ";

        $i = 0;
        open(FILES, "< files.txt");
        while (<FILES>)
        {
            $line = $_;
            chop($line);
            push(@filelist, $line);
            $i++;
            printf("\b\b\b\b\b\b%6d", $i);
        }
        close(FILES);
        print "\b\b\b\b\b\b\t\t\tdone ($i files)\n";
        $PHASE = 2;
    }

    if (-f "histories.txt")
    {
        print "Loading file histories:       ";
        $i = 0;
        open(HIST, "< histories.txt");
        while (<HIST>)
        {
            $line = $_;
            chop($line);
            push(@histories, $line);
            $i++;
            printf("\b\b\b\b\b\b%6d", $i);
        }
        close(HIST);
        print "\b\b\b\b\b\b\tdone ($i versions)\n";
        $PHASE = 3;
    }

    if (-f "extract_progress.txt")
    {
        my $lastatom = '';
        print "Calculating extract progress:";
        open(EXTRACT, "< extract_progress.txt");
        while (<EXTRACT>)
        {
            chop($_);
            $lastatom = $_;
        }
        close(EXTRACT);

        $RESUMEAFTERATOM = $lastatom;
        if ($DEBUG)
        {
            print STDERR "Resume after atom: $RESUMEAFTERATOM\n";
        }
        print "\tresume after atom $RESUMEAFTERATOM\n";
        $PHASE = 6;
    }
}


##############################################################
# Create the directory hierarchy in the local filesystem
#
sub create_directories
{
    if ($DEBUG)
    {
        print STDERR "\n#############################################################\n";
        print STDERR "#              Subroutine: create_directories               #\n";
        print STDERR "#############################################################\n";
    }

    print "Creating local directories: ";
    &recursive_delete('hgrepo');

    my $cmd = "hg init hgrepo";
    if ($DEBUG)
    {
        print STDERR "$cmd\n";
    }

    `$cmd`;
    if ($? != 0) {
        print STDERR "FAILED: $cmd => " . $? >> 8 . "\n";
        exit;
    }
    print "\tdone\n";
}


##############################################################
# Delete a directory tree and all of its files recursively
#
sub recursive_delete
{
    my ($parent) = @_;
    my (@dirs, $dir);

    opendir(DIR, $parent);
    @dirs = readdir(DIR);
    closedir(DIR);
    foreach $dir (@dirs)
    {
        if ($dir ne '.' && $dir ne '..')
        {
            recursive_delete("$parent/$dir");
        }
    }

    if (-d $parent)
    {
        rmdir($parent);
    }
    elsif (-f $parent)
    {
        unlink($parent);
    }
}


##############################################################
# Make a directory tree and all of its sub dirs recursively
#
sub recursive_mkdir
{
    my($tpath) = @_;
    my($dir, $accum);

    foreach $dir (split(/\//, $tpath))
    {
        $accum = "$accum$dir/";
        if ($dir ne "")
        {
            if (! -d "$accum")
            {
                mkdir $accum;
            }
        }
    }
}


##############################################################
# Checkout a copy of the directory hierarchy so that we have
# a Subversion local working copy
#
sub checkout_directories
{
    if ($DEBUG)
    {
        print STDERR "\n#############################################################\n";
        print STDERR "#             Subroutine: checkout_directories              #\n";
        print STDERR "#############################################################\n";
    }

    print "Checking out directories: ";

    my $cmd = "svn checkout --username \"$USERNAME\"";
	if (length($PASSWORD) > 0)
	{
		$cmd = $cmd . " --password \"$PASSWORD\"";
	}
	$cmd = $cmd . " --non-interactive hgrepo";
    if ($DEBUG)
    {
        print STDERR "$cmd\n";
    }

#    &recursive_delete('./work');
#    mkdir('./work');
    `$cmd`;
    if ($? != 0) {
        print STDERR "FAILED: $cmd => " . $? >> 8 . "\n";
        exit;
    }
    print "\tdone\n";

    $PHASE = 6;
}


##############################################################
# This is the meat.  Extract each version of each file in the
# project from VSS and check it into Mercurial
#
sub extract_and_import
{
    if ($DEBUG)
    {
        print STDERR "\n#############################################################\n";
        print STDERR "#              Subroutine: extract_and_import               #\n";
        print STDERR "#############################################################\n";
    }

    my $padding = "                                                                      ";
    my ($cmd, $tmpname, $localpath, $localdir, $out);
    my ($pyear,$pmon,$pmday,$phour,$pmin,$num) = (0,0,0,0,0,0);

    my $count = @atoms;
    my $startatom = 0;

	my $projpat = $SSPROJ;
	$projpat =~ s/\//\/\//g;
	$projpat =~ s/\$\//\^\\\$/g;

    print "Extracting and creating:\n";

    open(EXTRACT, ">>extract_progress.txt");

    if ($RESUMEAFTERATOM ne '')
    {
        my ($atomnr,$atom) = split(/,/,$RESUMEAFTERATOM,2);
        if ($atoms[$atomnr] eq $atom) {
            $startatom = $atomnr + 1;
        }
        else {
            print STDERR "ERROR! Resume inconsistency: atom $atomnr has changed:\nexp: $atom\ncur: $atoms[$atomnr]\n";
            print "ERROR! Resume inconsistency!\n";
            exit;
        }
    }

    chdir('hgrepo');
    for (my $i = $startatom; $i <= $#atoms; $i++)
    {
        my $atom = $atoms[$i];
        my $targets = '';

        foreach my $atomfile (values %{$atomlist{$atom}})
        {
            my ($file,$version,$action) = split(',',$atomfile,3);

			if ($action =~ /^labeled /)
			{
				# Nothing to extract - just do a 'hg tag'
				$action = $';			
				my ($user,$datetime,$timestamp1,$timestampn,$comment) = split(/,/,$atom,5);

				# Substitude colons with nothing
				$action =~ s/://g;

				# display sugar
				$tmpname = "Creating Tag $action [$user $datetime]";
				printf("\r$padding\r  (%3d\%): %s", ((($i+1) / $count) * 100), substr("$tmpname", 0, 60));
				
				if ($DEBUG) 
				{
					print STDERR "$tmpname\n";
				}
				if ($FORCEUSER ne '')
				{
					$user = $FORCEUSER;
				}
		
				# Translate character codes from CP437/CP850 to UTF-8 (åäöÅÄÖ)
				#$comment =~ tr/\206\204\224\217\216\231/\254\253\271\197\196\214/;
				#$comment =~ tr/\206\204\224\217\216\231/aaoAAO/;
				#$comment =~ s/</[lt]/g;
				#$comment =~ s/>/[gt]/g;
				$comment =~ s/"/\\"/g; # quote quotes
				$comment =~ s/__NL__/ /g;
		
				# commit changes as the VSS user (with a blank password)
				$cmd = "hg tag -f --user \"$user\" --noninteractive --date \"$datetime\" --message \"$comment\" \"$action\" 2>&1";
				$out = `$cmd`;
				if ($DEBUG)
				{
					print STDERR "$cmd\n";
					print STDERR "$out";
				}
				if ($? != 0) 
				{
					print STDERR "FAILED: $cmd => " . $? >> 8 . "\n";
					exit;
				}
				next;
			}

			$file =~ s/__COMMA__/,/g;
			
            # display sugar
            $tmpname = substr($file, rindex($file,'/') + 1, 50) . ' (v.' . $version . ')';
            printf("\r$padding\r  (%3d\%): %s", ((($i+1) / $count) * 100), substr("$tmpname", 0, 60));

			# extract to the proper directory (less the subversion project name)
			$localpath = substr($file, length($SSPROJ) + 1);
			if ($WINDOWS > 0)
			{
				$localpath =~ s/\//\\/g;
			}
			$file =~ /^\$\//;

			$localdir = $localpath;
			my $slash = rindex($localdir, $FILESEP);
			if ($slash == -1) {
				$localdir = '.';
			}
			else {
				$localdir = substr($localdir,0,$slash);
			}

			if (! -d $localdir)
			{
				mkpath($localdir);
			}

			#print STDERR "file = $file\n"; # DEBUG ONLY
			#print STDERR "localpath = $localpath\n"; # DEBUG ONLY
			#print STDERR "localdir = $localdir\n"; # DEBUG ONLY

			my $fileexists = -f $localpath;
			
			$cmd = $SSCMD . " Get -GTM -W -I-Y -GL\"$localdir\" -V$version \"$file\" 2>&1";
			$out = `$cmd`;

			# get rid of stupid VSS warning messages
			###$* = 1;
			$out =~ s/\n?Project.*rebuilt\.//g;
			$out =~ s/\n?File.*rebuilt\.//g;
			$out =~ s/\n.*was moved out of this project.*rebuilt\.//g;
			$out =~ s/\nContinue anyway.*Y//g;
			###$* = 0;

			if ($DEBUG) 
			{
				print STDERR "$cmd\n";
				print STDERR "$out";
			}
			
			if ($? != 0) 
			{
				print STDERR "FAILED: $cmd => " . $? >> 8 . "\n";
				exit;
			}
			
			if ($out =~ /does not retain old versions of itself/) 
			{
				print STDERR "WARNING: Binary file without history: $file\n";
			}
			elsif (! -f $localpath) 
			{
				print STDERR "ERROR: File not checked out: $file (v.$version)\n";
			}
			else 
			{
				# create list of targets to commit in this atom
				$targets .= "$localpath\n";

				if (! $fileexists ) 
				{
					$cmd = "hg add \"path:$localpath\" 2>&1";
					$out = `$cmd`;
					if ($DEBUG) 
					{
						print STDERR "$cmd\n";
						print STDERR "$out";
					}
					if ($? != 0) 
					{
						print STDERR "FAILED: $cmd => " . $? >> 8 . "\n";
						exit;
					}
				}
			}
		}
    
        if ($targets ne '')
        {
            my ($user,$datetime,$timestamp1,$timestampn,$comment) = split(/,/,$atom,5);
    
            # display sugar
            $tmpname = "Commit atom $i [$user $datetime]";
            printf("\r$padding\r  (%3d\%): %s", ((($i+1) / $count) * 100), substr("$tmpname", 0, 60));
            
            if ($DEBUG) 
            {
                print STDERR "$tmpname\n";
            }
            if ($FORCEUSER ne '')
            {
                $user = $FORCEUSER;
            }
    
            # Translate character codes from CP437/CP850 to UTF-8 (åäöÅÄÖ)
            #$comment =~ tr/\206\204\224\217\216\231/\254\253\271\197\196\214/;
            #$comment =~ tr/\206\204\224\217\216\231/aaoAAO/;
            #$comment =~ s/</[lt]/g;
            #$comment =~ s/>/[gt]/g;
            $comment =~ s/"/\"/g; # quote quotes
            $comment =~ s/__NL__/\n/g;
    
            open(MESSAGE,">___message");
            print MESSAGE "$comment\n";
            close(MESSAGE);
            
            # commit changes as the VSS user (with a blank password)
            $cmd = "hg commit --user \"$user\" --noninteractive --date \"$datetime\" --logfile ___message 2>&1";
            $out = `$cmd`;
            if ($DEBUG)
            {
                print STDERR "$cmd\n";
                print STDERR "$out";
            }
            if (($? >> 8) > 1) 
            {
                print STDERR "FAILED: $cmd => " . $? >> 8 . "\n";
                exit;
            }
    
            # Clean up
            unlink("___message");
        }
            
        print EXTRACT "$i,$atom\n";
    }
    close(EXTRACT);
    printf("\r$padding\r                                done (%d atoms)\n", $#atoms + 1);
}


##############################################################
# Get a formatted date time
#
sub prettydate
{
    my ($sec, $min, $hrs, $day, $month, $year) = (localtime)[0,1,2,3,4,5];
    return(sprintf("%04d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $month+1, $day, $hrs, $min, $sec));
}
