#!/usr/bin/perl
#
# pp -- C PreProcessor resolver.
#
# Script to recursively track down C macro definitions.
#  Very useful for Perl's source code ;)
#
# Copyright (c) 2000-2011 Alex Davies. All rights reserved. This program
# is free software; you can redistribute it and/or modify it under the
# same terms as Perl itself.
#

# [Mar2011] Releasing to CPAN (just in case my hard drive packs in ;-)
# I wrote the first version of this more than 10 years ago. It's grown
# to cover more cases and code styles. It's targeted at the perl source
# code, but should work reasonably well on many others too. YMMV.

use warnings;
use strict;
use Term::ANSIColor;
require Win32::Console::ANSI if $^O eq 'MSWin32';

our $VERSION = '0.01';

${^WIN32_SLOPPY_STAT} = 1;
$| = 1;

my $redirected_STDOUT = ! -t STDOUT;

my $reset_col   = $redirected_STDOUT ? '' : Term::ANSIColor::color("reset");
my $comment_col = $redirected_STDOUT ? '' : Term::ANSIColor::color("reset bold green");
my $macro_col   = $redirected_STDOUT ? '' : Term::ANSIColor::color("reset bold yellow");

$SIG{INT} = sub { exit };

END { close STDOUT };

################################################################################

my $usage = <<"EOT";
Usage: pp [-a|NUM] [-r] [-v] [-n] macro [dir ...]

Recursively expand C macros etc.

	-a|NUM  = show all results, or only upto NUM depth
	-r      = search recursively for C files to work on
	-v      = verbose/debug output
	-n      = no pager
EOT

# NB. It is a little exuberant... make the default action ignore
#  certain 'understood' macros, but add '-a' option to give them _all_.
#
my %filter = map {$_ => 1} qw(
	STMT_START STMT_END STRLEN int void VOID CONST U32 U16 U8 I32 I16 I8
	bool TRUE FALSE struct register EXT const CONST INT LONG SHORT
	BYTE ULONG UINT UBYTE CHR signed WORD STATIC
	IV UV NV NULL
	aTHX_ pTHX_ vTHX_ SV
	aTHX  pTHX  vTHX
	EXTERN_C
	PERLVAR
	__attribute__ inline __used
);

my $max_depth = 5; # default
my $all;
my $verbose;
my $max_depth_set;
my $recursive;
my $cache_file = "./.ppres_cache.pl";
my $pager = -t STDOUT;

while (@ARGV) {
	if ($ARGV[0] eq '-a') {
		$all = 1;
	} elsif ($ARGV[0] eq '-r') {
		$recursive = 1;
	} elsif ($ARGV[0] eq '-n') {
		$pager = 0;
	} elsif ($ARGV[0] eq '-v') {
		$verbose = 1;
	} elsif ($ARGV[0] =~ /^\d+$/ and @ARGV > 1) {
		$max_depth = shift @ARGV;
		$max_depth_set = 1;
		next;
	} else {
		last;
	}
	shift @ARGV;
}

@ARGV or die $usage;

my $macro = shift @ARGV;

if ($all) { # or exists $filter{$macro}
	%filter = ();
	$max_depth = undef unless $max_depth_set
}

################################################################################

if ($pager) {
	# Pipe all output thro' a pager to ease viewing.
	my $less;
	foreach my $f ("c:/cygwin/bin/less.exe", "/usr/bin/less") {
		if (-x $f) {
			$less = $f;
			last;
		}
	}
	$less or die "no pager!";
	open(PAGER_OUT, '|-', "$less -mFRX")
		or die "pp: unable to pipe STDOUT via less\n";
	*STDOUT = \*PAGER_OUT;
	*STDERR = \*PAGER_OUT;
}

################################################################################

# Speed up. Cache data. Only use cache if it is newer than all the source files.
my $use_cache = 0;

#if (-f $cache_file) { # no need to check if it doesn't exist.
#	my $cache_file_mtime = (stat $cache_file)[9];
#	# NB. $0 may be "pp.pl" - so use __FILE__
#	if ($cache_file_mtime > (stat __FILE__)[9]) { # if this script is modified then regenerate
#		$use_cache = 1;
#		$cache_file_mtime += 24*60*60; # only regenerate once a day.
#		foreach my $f (@files) {
#			my $mtime = (stat $f)[9];
#			if ($mtime > $cache_file_mtime) {
#				$use_cache = 0;
#				last;
#			}
#		}
#	}
#}

$use_cache = -e $cache_file; # user can delete existing cache file to regen it.

my %apidoc = (); # Perl source code specific
my %defs = (); # C definitions
my %structs; # all struct/unions

if ($use_cache) {
	my $data = do $cache_file;
	die "bad cache file '$cache_file': $@\n" if $! or $@ or !defined $data;
	%apidoc  = %{ $data->{apidoc} };
	%defs    = %{ $data->{defs} };
	%structs = %{ $data->{structs} };
} else {

################################################################################

print STDERR "Getting C files\n";

# Get all the C files. NB. #define's can be in *.c files aswell!
my @files = ();
my @dirs = @ARGV ? @ARGV : '.';

my $c_file_pat = "(?i)\\.(2?h|c(pp)?)\$";

foreach my $dir (@dirs) {
	$dir =~ s/\\/\//g;
	$dir =~ s/\/\/+/\//g;
	$dir =~ s/\/$//;

	if ($recursive) {
		require File::Find;
		File::Find::find(sub {
			my $filename = $_;
			my $fullpath = $File::Find::name;
			if ($filename =~ /$c_file_pat/) {
				$fullpath =~ s/\\/\//g;
				push @files, $fullpath;
			}

		}, $dir);
	} else {
		unless (opendir DIR, $dir) {
			warn "pp: could not opendir '$dir' -- $!\n";
			next;
		}
		push @files, map {$dir eq '.' ? $_ : "$dir/$_"} grep /$c_file_pat/, readdir DIR;
		closedir DIR;
	}
}

die "pp: failed to find any C files\n" unless (@files or -e $cache_file);

for (@files) { s|^\./|| } # strip redundant leading "./" from filenames.

################################################################################

print STDERR "Reading C files\n";

# Read each file looking for relevent code snippets.
foreach my $file (@files) {
	print "pp: reading $file\n" if $verbose;
	if (!open(FIN, "<$file")) {
		warn "pp: can't open $file -- $!\n";
		next;
	}
	my ($continued_line, $original_line);
	my $previous_line = '';
	while (<FIN>) {
		$original_line = $_;

		s/^\s*//;

		# Fix for "struct foo\n{\n..."
		if ($continued_line) {
			$_ = "$continued_line$_";
			$continued_line = undef;
		} elsif (/^(typedef\s+)?(struct|union|enum)(\s+\w+)?\s*$/) {
			$continued_line = $_;
			next;
		}

		# Fix for Tcl func defs split over multiple lines.
		#  ... and for Perl funcs eg. Perl_gv_fetchpvn_flags
		if ($original_line =~ /^(\w+)\((.*,)?$/ and $1 !~ /^(do|if|while|switch|JS_\w+_API)$/) {
			my $func_def = $original_line;
			while (<FIN>) {
				$func_def .= $_;
				my $code_part = $_;
				next if $code_part =~ /^\s*\*/; # continued multiline C comment XXX
				$code_part =~ s|/\*.*?\*/||g;   # /* ... */
				$code_part =~ s|//.*||;         # // ...
				last if $code_part =~ /\)/;
			}
			$original_line = $func_def;
		}

reparse_line:

#print "** $original_line\n" if $verbose;

		if (/^#\s*define\s+(\w+)/) {
			my $m = $1;
			my $def = $_;
			my $lineno = $.;
			my $cont_line;
			$def =~ s/^#\s+/#/;
			if (/\\$/) {
				$cont_line = $_;
				while (<FIN>) {
					$def .= $_;
					last unless /\\$/;
				}
			}
			push @{$defs{$m}}, [$def, $file, $lineno];

			if ($cont_line) {
				$cont_line =~ s/\015?\012\z//; # chomp
				# Handle _XPV_HEAD etc.
				# Grab all struct members.
				my @lines = split /\n/, $def;
				foreach my $line (@lines) {
					my $mem_def = $line;
					$line =~ s|/\*.*?\*/||;
					$line =~ s|/\*.*||;
					if ($line =~ /(\w+);/) {
						my $mem = $1;
						# Don't save full typedef.
						push @{$structs{$mem}}, [$mem_def, $cont_line, $file, $lineno];
					}
				}
			}
		}
		# Simple typedef's eg. "typedef int INT;"
		elsif (/^typedef\b.*\b([_a-zA-Z]\w+)\s*;/) {
			push @{$defs{$1}}, [$_, $file, $.];
		}
		# Simple 'function' typedef's eg. "typedef char * (fptr*)(int, char);"
		elsif (/^typedef\s+.*\(\W*(\w+)\W*\)\s*\(.*\)\s*;/) {
			push @{$defs{$1}}, [$_, $file, $.];
		}
# TODO
#		# Handle enums of the form:
#		#  enum { \n E_ONE, \n E_TWO, \n E_THREE ... }
#		elsif (/^enum\s+\{/ and !/\}/) {
#
#
#		}
		# Special case for struct and union's.
		elsif (/^(typedef\s+)?(struct|union|enum)\s+((\w+)\s*)?\{/) {
			my $m1 = $4;
			my $def = $_;
			my $lineno = $.;
			my $is_struct = ($2 =~ /struct|union/);
			my $m2;
			while (<FIN>) {
				$def .= $_;
				# NB. assume closing '}' tied to start of line
				#  (can have struct's within struct's!)
				if (/^\}(\s+(\w+))?/) {
					$m2 = $2;
					last;
				}
			}
			if ($m1 or $m2) {
				push @{$defs{$m1}}, [$def, $file, $lineno] if $m1;
				push @{$defs{$m2}}, [$def, $file, $lineno] if ($m2 and not ($m1 and $m1 eq $m2));
				if ($is_struct) {
					# Grab all struct members.
					my @lines = split /\n/, $def;
					foreach my $line (@lines) {
						my $mem_def = $line;
						$line =~ s|/\*.*?\*/||;
						$line =~ s|/\*.*||;
						if ($line =~ /(\w+);/) {
							my $mem = $1;
							# Don't save full typedef.
							my $label = "struct " . ($m1 || $m2) . "{";
							push @{$structs{$mem}}, [$mem_def, $label, $file, $lineno];
						}
					}
				}
			} else {
				# This occurs when a struct is not nicely indented.
				#  eg. utbuf in doio.c

				#warn ">"x60, "\n\n", $def, "\n\n";
			}
		}
		# Grab any Perl source code documentation
		elsif (/^=for apidoc [^\|]*\|[^\|]*\|(\w+)/
		    || /^=for apidoc (\w+)\s*$/) {
			my $m = $1;
			my $doc = $_;
			my $reparse_line;
			while (<FIN>) {
				next if /^\s*$/;
				if (/^=/) { # and need to reparse line
					$reparse_line = $_;
					last;
				}
				last if /^\s*\*\//;
				$doc .= $_;
			}
			if (exists $apidoc{$m}) {
				warn "found repeated apidoc for '$m'\n";
			}
			$apidoc{$m} = $doc;
			if ($reparse_line) {
				$_ = $reparse_line;
				goto reparse_line;
			}
		}
		# Grab function definition prototype
		elsif ($original_line =~ /^(\w+)\(.*\)\s*(?:(?:\/\*|\/\/|\{).*)?$/ms
				and $1 !~ /^JS_\w+_API$/ # this is the return type!
		) {
			my $func = $1;
			my $def = $previous_line . $original_line;
			# Fix for some Perl #define's that look like function defn's but aren't
			if ($original_line =~ /^(PERLVAR\w*|XS|PERL_PPDEF|PP)\((\w+)/) {
				$func = $2;
				$def = $original_line;
			}
			push @{$defs{$func}}, [$def, $file, $.];
		}

		$previous_line = $_;
	}
	close FIN;
}

} # End of acquiring data.

################################################################################

if (exists $defs{$macro}) {
	show_mac($macro, 1);
} else {
	print STDERR "pp: failed to find definition for $macro\n";
}

unless ($use_cache) { # Cache the data if we've had to refresh it
	print STDERR "Dumping data to $cache_file\n";
	require Data::Dumper;
	open FOUT, ">$cache_file" or die "cannot write to $cache_file: $!\n";
	local $Data::Dumper::Sortkeys = 1;
	local $Data::Dumper::Indent = 0;
	my %cache = (
		'defs'    => \%defs,
		'apidoc'  => \%apidoc,
		'structs' => \%structs,
	);
	my $data = Data::Dumper->Dump([\%cache], ['data']);
	print FOUT "return my $data\n";
	close FOUT;
}

exit;

################################################################################

{
my %done = ();

# Recursively display definitions for a 'name'.
sub show_mac {
	my ($mac, $depth) = @_;

	if (defined $max_depth and $depth > $max_depth) {
		print "** exceeded max depth ($max_depth): $mac\n" if $verbose;
		return;
	}
	if ($done{$mac}++) {
		print "** already printed definition: $mac\n" if $verbose;
		return;
	}
	if (!exists $defs{$mac}) {
		print "** no definition found for: $mac\n" if $verbose;
		return;
	}

	foreach my $def_aref (@{$defs{$mac}}) {
		my ($def, $file, $lineno) = @$def_aref;
		# Highlight the macro name in #define's:
		my $colored_def = $def;
		$colored_def =~ s/\b$mac\b/ $macro_col . $mac . $reset_col /e;
		print $comment_col, "/* ($depth) $file $lineno */", $reset_col, "\n", $colored_def, "\n";
		if (exists $apidoc{$mac}) {
			print $apidoc{$mac}, "\n";
			delete $apidoc{$mac}; # avoid repetitions of the same apidoc!
		}
		##
		# If the definition looks like a struct member eg.
		#         #define foo(x) (x)->bar
		#  ...then search for any bar's in the struct definitions.
		if ($def =~ /^\s*\#\s*define\s+\w+\(\s*(\w+)\s*\)\s+.*\b\1[\s\)]*->(\w+)/) {
			my $member = $2;
			print "** found struct member $member: $def\n" if $verbose;
			foreach my $mem (@{$structs{$member}}) {
				my ($m_line, $m_struct, $m_file, $m_lineno) = @$mem;
				my $m_depth = $depth+1;
				print $comment_col, "/* ($m_depth) $m_file $m_lineno */", $reset_col, "\n";
				print $m_struct, "\n";
				my $colored_line = $m_line;
				$colored_line =~ s/\b$member\b/ $macro_col . $member . $reset_col /e;
				print $colored_line, "\n\n";
				# recurse on the member types.
				if ($m_line =~ /^(.*?)\s+\w+;/) {
					my $type_decl = $1;
					print "** found type for member $member: $type_decl\n" if $verbose;
					my @types = split /[\s\*]+/, $type_decl;
					foreach my $type (@types) {
						show_mac($type, $m_depth+1);
					}
				}
			}
		}
		#
		##

		my @words = split_def($def);
		foreach my $word (@words) {
			show_mac($word, $depth+1);
		}
	}
}
}

################################################################################

# Extract 'words' from a C statement to recursively find definitions for.
sub split_def {
	my $def = shift;
	#print "DBG: def=$def\n";
	my $is_macro_def = ($def =~ s/^#\s*define\s+//);
	$def =~ s/\/\*.*?\*\///sg;    # remove C comments
	$def =~ s/"(\\"|[^"])*"//g;   # remove strings
	$def =~ s|/\*.*||s;           # remove a trailing C comment (which carries onto next line)
	$def =~ s/^(typedef\s+)?((struct|union|enum)\s+)?//;   #

	# Grab the words.
	my @words = ($def =~ /\b([_a-zA-Z]\w+)/g);
	my %seen;
	@words = grep !$seen{$_}++, @words;

	if ($is_macro_def) {
		# Also need to remove any macro arguments. eg. #define M(a,b) ...
		if ($def =~ s/^\w+\(//) {
			if ($def =~ s/^(.*?)\)//) {
				my @params = split /\s*,\s*/, $1;
				#print "DBG: params=@params\n";
				my %params = map { $_ => 1 } @params;
				@words = grep !$params{$_}, @words;
			}
		}
	}

	# Check against 'filter' list.
	if (keys %filter) {
		my @filtered_words = ();
		foreach (@words) {
			if ($filter{$_}) {
				print "** ignoring '$_'\n" if $verbose;
			} else {
				push @filtered_words, $_;
			}
		}
		@words = @filtered_words;
	}

	#print "DBG: words=@words\n";
	return @words;
}

################################################################################

# Avoid "used only once" warning:
1 or ($File::Find::name, $Data::Dumper::Sortkeys, $Data::Dumper::Indent);