/* sjisctypetest.c v00.00.00.jmr// Test frame for building my own version of character typing for sJIS.// Perhaps this can be an example of how to handle character classification // for variable width characters?// Written by Joel Matthew Rees, beginning March 2000, Hyogo, Japan.//   joel_rees@sannet.ne.jp//// Copyright 2000, 2001 Joel Matthew Rees.//   All rights reserved.//// Assignment of Stewardship, or Terms of Use: //// The author grants permission to use and/or redistribute the code in this // file, in either source or translated form, under the following conditions:// 1. When redistributing the source code, the copyright notices and terms of //    use must be neither removed nor modified. // 2. When redistributing in a form not generally read by humans, the //    copyright notices and terms of use, with proper indication of elements //    covered, must be reproduced in the accompanying documentation and/or //    other materials provided with the redistribution. In addition, if the //    source includes statements designed to compile a copyright notice //    into the output object code, the redistributor is required to take //    such steps as necessary to preserve the notice in the translated //    object code.// 3. Modifications must be annotated, with attribution, including the name(s) //    of the author(s) and the contributor(s) thereof, the conditions for //    distribution of the modification, and full indication of the date(s) //    and scope of the modification. Rights to the modification itself //    shall necessarily be retained by the author(s) thereof.// 4. These grants shall not be construed as an assignment or assumption of //    liability of any sort or to any degree. Neither shall these grants be //    construed as endorsement or represented as such. Any party using this //    code in any way does so under the agreement to entirely indemnify the //    author and any contributors concerning the code and any use thereof. //    Specifically, THIS SOFTWARE IS PROVIDED AT NO COST, AS IT IS, WITHOUT //    ANY EXPRESS OR IMPLIED WARRANTY OF ANY SORT, INCLUDING, BUT NOT LIMITED //    TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. //    UNDER NO CIRCUMSTANCES SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR //    ANY DAMAGES WHATSOEVER ARISING FROM ITS USE OR MISUSE, EVEN IF ADVISED //    OF THE EXISTENCE OF THE POSSIBILITY OF SUCH DAMAGE.// 5. This code should not be used for any illegal or immoral purpose, //    including, but not limited to, the theft of property or services, //    deliberate communication of false information, the distribution of drugs //    for purposes other than medical, the distribution of pornography, the //    provision of illicit sexual services, the maintenance of oppressive //    governments or organizations, or the imposture of false religion and //    false science. //    Any illegal or immoral use incurs natural and legal penalties, which the //    author invokes in full force upon the heads of those who so use it.// 6. Alternative redistribution arrangements://    a. If the above conditions are unacceptable, redistribution under the //       following commonly used public licenses is expressly permitted://       i.   The GNU General Public License (GPL) of the Free Software //            Foundation.//       ii.  The Perl Artistic License, only as a part of Perl.//       iii. The Apple Public Source License, only as a part of Darwin or //            a Macintosh Operating System using Darwin.//    b. No other alternative redistribution arrangement is permitted.//       (The original author reserves the right to add to this list.)//    c. When redistributing this code under an alternative license, the //       specific license being invoked shall be noted immediately beneath //       the body of the terms of use. The terms of the license so specified //       shall apply only to the redistribution of the source so noted. // 7. In no case shall the rights of the original author to the original work //    be impaired by any distribution or redistribution arrangement.//// End of the Assignment of Stewardship, or terms of use.//// License invoked: Assignment of Stewardship.// Notes concerning license://    Compiler directives are strongly encouraged as a means of meeting //    the attribution requirements in the Assignment of Stewardship.*/#include <stdio.h>#include <stdlib.h>#include <limits.h>#include <string.h>#include <ctype.h>#include "port.h"#include "slowsjctype.h"#include "sj16bitChars.h"#include "sj8bitChars.h"/* The test ranges are derived from the same sources mentioned in the // comments of the source code.// The structure of the tests is different.// Two views can help to avoid errors (make sure I said what I thought I said).*/#define k_testStringWidth	8#define E_incStringBoundaryError	0x800#define k_incStringMaxPos	( k_testStringWidth - 1 )static int incString( char * str, int pos, int inc ){	ubyte * ustr = (ubyte *) str;	int result = 0;	int carry = inc;	if ( pos < 0 || pos > k_incStringMaxPos )	/* firewall */		return E_incStringBoundaryError;	for ( ;; ) 	{	result = ustr[ pos ] + carry;		ustr[ pos ] = (ubyte) result;		carry = result >> CHAR_BIT;		if ( carry == 0 || pos <= 0 )			break;		--pos;	}	return carry;}static long testIsPOneByte( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 2 ] = test[ 3 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )	{	int errant = 0;		int iTest = (ubyte) test[ 1 ];		int testResult = slowsjIsPOneByte( test + 1 );		if ( iTest <= 0x7f )	/* DEL */			errant = ( testResult == 0 );		else if ( iTest < 0xa1 )	/* kuten */			errant = ( testResult != 0 );		else if ( iTest <= 0xdf )	/* handakuten */			errant = ( testResult == 0 );		else			errant = ( testResult != 0 );		if ( errant )			++errCt;		if ( errant || showAll )			printf( "0x%03x <%s> is %s a one byte character.\t%s\n", 					iTest, test + 1, testResult ? "   " : "NOT",					errant ? "*ERROR*" : "" );	}	return errCt;}static long testIsPHighByte( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 2 ] = test[ 3 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )	{	int errant = 0;		int iTest = (ubyte) test[ 1 ];		int testResult = slowsjIsPHighByte( test + 1 );		if ( iTest < 0x81 )	/* From symbols */			errant = ( testResult != 0 );		else if ( iTest <= 0x9f )	/* through level 1 and part of level 2; */			errant = ( testResult == 0 );		else if ( iTest < 0xe0 )	/*  half-width katakana; */			errant = ( testResult != 0 );		else if ( iTest <= 0xfc )	/* continuing with level 2 and reserved area. */			errant = ( testResult == 0 );		else	/* Defined as not character codes. */			errant = ( testResult != 0 );		if ( errant )			++errCt;		if ( errant || showAll )			printf( "0x%03x <%s> is %s a high byte.\t%s\n", 					iTest, test + 1, 					testResult ? "   " : "NOT",					errant ? "*ERROR*" : "" );	}	return errCt;}static long testIsPLowByte( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 2 ] = test[ 3 ] = 0x9f;	for ( test[ 0 ] = 0x9f, test[ 1 ] = 0; test[ 0 ] == '\x9f'; incString( test, 1, 1 ) )	{	int errant = 0;		int iTest = (ubyte) test[ 1 ];		int testResult = slowsjIsPLowByte( test + 1 );		if ( iTest < 0x40 )	/* Defined as below range. */			errant = ( testResult != 0 );		else if ( iTest < 0x7f )	/* even half; */			errant = ( testResult == 0 );		else if ( iTest == 0x7f )	/* DEL character gap; */			errant = ( testResult != 0 );		else if ( iTest <= 0xfc )	/* odd half. */			errant = ( testResult == 0 );		else	/* Defined as not character codes. */			errant = ( testResult != 0 );		if ( errant )			++errCt;		if ( errant || showAll )			printf( "0x%03x <%s> is %s a low byte.\t%s\n", 					iTest, test, 					testResult ? "   " : "NOT",					errant ? "*ERROR*" : "" );	}	return errCt;}static long testIsP7bit( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 2 ] = test[ 3 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )	{	int errant = 0;		int iTest = (ubyte) test[ 1 ];		int testResult = slowsjIsP7bit( test + 1 );		if ( iTest < 0x80 )			errant = ( testResult == 0 );		else			errant = ( testResult != 0 );		if ( errant )			++errCt;		if ( errant || showAll )			printf( "0x%03x <%s> is %s a 7 bit character.\t%s\n", 					iTest, test + 1, 					testResult ? "   " : "NOT",					errant ? "*ERROR*" : "" );	}	return errCt;}/* If the above tests pass, this can be used in the rest of the tests.// But check it against GuessCount, below, as well, first.*/static long collectByGuess( char * chp ){	if ( slowsjIsPHighByte( chp ) && slowsjIsPLowByte( chp + 1 ) )		return ( ( (ubyte) chp[ 0 ] ) << 8 ) + (ubyte) chp[ 1 ];	else		return (ubyte) chp[ 0 ];}/* I really, really wanted to be able to use the actual characters here!*/static long testPGuessCount( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 3 ] = test[ 4 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 		  test[ 0 ] < 1; 		  incString( test, 2, 1 ) )	{	int errant = 0;		int iTest = slowsjPGuessCount( test + 1 );		int iTest1Byte = slowsjIsPOneByte( test + 1 );		int errant1Byte =  iTest1Byte && ( iTest != 1 );		long testResult = collectByGuess( test + 1 );		int collectCk = 0;		int collectHi = (ubyte) test[ 1 ];		int collectLo = (ubyte) test[ 2 ];		long collectResult = collectHi;		/* 東京タワー for my son */		/* Assemble a character. */		if ( ( ( collectHi >= 0x81 && collectHi <= 0x9f )			   || ( collectHi >= 0xE0 && collectHi <= 0xfc ) )			 && ( collectLo >= 0x40 && collectLo != 0x7f && collectLo <= 0xfc ) )		{	collectResult = ( collectHi << CHAR_BIT ) + collectLo;		}		collectCk = ( testResult != collectResult );	/* Test the test. */		if ( testResult > 0xff ) 			errant = ( iTest != 2 );		else if ( testResult < 0x80				  || ( testResult >= 0xa1 && testResult <= 0xdf ) )			errant = ( iTest != 1 );		else 			errant = ( iTest != 0 );		if ( errant || collectCk || errant1Byte )			++errCt;		if ( errant || errant1Byte || collectCk || ( showAll == 2 )			 || ( showAll == 1 && ( !iTest1Byte || collectLo == 0 || collectLo == 0xff ) ) )		{	printf( "0x%03x%02x (0x%05lx) <%s>, length==%d.\t%s", 					(ubyte) test[ 1 ], (ubyte) test[ 2 ], 					testResult, test + 1, iTest, 					errant ? "length *ERROR*" : "" );			if ( iTest1Byte && ( iTest != 1 ) )				printf( "\tone byte disagreement *ERROR*" );			if ( collectCk )				printf( "\tcollect *ERROR* (0x%05lx)", collectResult );			putchar( '\n' );		}	}	return errCt;}/* Guess count cleared: visually checked with RE search in CodeWarrior. JMR2001.05.23 // bool slowsjIsPOneByte(), slowsjIsPHighByte(), slowsjIsPLowByte(), and // slowsjIsP7bit() all verified at this point. JMR2001.05.23*//* verified JMR2001.05.24*/static long testIsPCntrl( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 2 ] = test[ 3 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )	{	int errant = 0;		int iTest = (ubyte) test[ 1 ];		int testResult = slowsjIsPCntrl( test + 1 );		if ( iTest < 0x20 || iTest == 0x7f )			errant = ( testResult == 0 );		else			errant = ( testResult != 0 );		if ( errant )			++errCt;		if ( errant || showAll )			printf( "0x%03x <%s> is %s a control character.\t%s\t%s\n", 					iTest, test + 1, 					testResult ? "   " : "NOT", 					iscntrl( test[ 1 ] ) ? "(ANSI C cntrl)" : "",					errant ? "*ERROR*" : "" );	}	return errCt;}/* verified JMR2001.05.25// Mac Metrowerks C library isspace() returns true when the bottom byte is '\xca'. WHY?// Tried isspace() on MS Windows Metrowerks and it does not return this. // I guess it's a Mac thing?// (Note that I am not so curious as to look it up.)*/static long testPSpace( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 3 ] = test[ 4 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 		  test[ 0 ] < 1; 		  incString( test, 2, 1 ) )	{	int errant = 0;		int iTest = slowsjIsPSpace( test + 1 );		long guessedCt = slowsjPGuessCount( test + 1 );		long collected = collectByGuess( test + 1 );		/* 東京タワー for my son */		if ( guessedCt == 2			 && ( test[ 1 ] == '\x81' && test[ 2 ] == '\x40' )	/* two-byte space */ )			errant = ( iTest != 2 );		else if ( guessedCt == 1				  && ( test[ 1 ] == '\x09'		/* horizontal tab */					   || test[ 1 ] == '\x0a'	/* line-feed/new-line */					   || test[ 1 ] == '\x0b'	/* vertical tab */					   || test[ 1 ] == '\x0c'	/* form feed */					   || test[ 1 ] == '\x0d'	/* carriage return */					   || test[ 1 ] == '\x20'	/* one-byte space */ ) )			errant = ( iTest != 1 );		else 			errant = ( iTest != 0 );		if ( errant )			++errCt;		if ( errant || ( showAll == 2 ) || isspace( collected )			 || ( showAll == 1 				  && ( ( guessedCt == 1 && ( test[ 2 ] == '\00' || test[ 2 ] == '\xff' ) )					   || ( guessedCt == 2 && ( collected < 0x8200 || iTest != 0 ) ) ) ) 		   )		{	printf( "0x%03x%02x (0x%05lx) <%s> is %s whitespace.\t%s\t%s\n", 					(ubyte) test[ 1 ], (ubyte) test[ 2 ], 					collected, test + 1, 					( iTest != 0 ) ? "   " : "NOT", 					errant ? "*ERROR*" : "",					isspace( collected ) ? "ANSI C space" : "" );		}	}	return errCt;}/* verified JMR2001.05.28*/static long testPDigit( int showAll ){	char test[ k_testStringWidth ] = { 0 };	long errCt = 0;	test[ 3 ] = test[ 4 ] = 0x9f;	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 		  test[ 0 ] < 1; 		  incString( test, 2, 1 ) )	{	int errant = 0;		int errantHex = 0;		int iTest = slowsjIsPDigit( test + 1 );		int iTestHex = slowsjIsPXDigit( test + 1 );		long guessedCt = slowsjPGuessCount( test + 1 );		long collected = collectByGuess( test + 1 );		/* 東京タワー for my son */		if ( guessedCt == 2 && test[ 1 ] == '\x82'	/* lead byte */			 && ( (ubyte) test[ 2 ] >= 0x4f && (ubyte) test[ 2 ] <= 0x58 ) )	/* 0 - 9 */			errant = ( iTest != 2 );		else if ( guessedCt == 1				  && ( (ubyte) test[ 1 ] >= '0' && (ubyte) test[ 1 ] <= '9' ) )			errant = ( iTest != 1 );		else 			errant = ( iTest != 0 );		if ( guessedCt == 2 && test[ 1 ] == '\x82'	/* lead byte */			 && ( ( (ubyte) test[ 2 ] >= 0x4f && (ubyte) test[ 2 ] <= 0x58 )	/* 0 - 9 */				  || ( (ubyte) test[ 2 ] >= 0x60 && (ubyte) test[ 2 ] <= 0x65 ) /* A - F */				  || ( (ubyte) test[ 2 ] >= 0x81 && (ubyte) test[ 2 ] <= 0x86 ) /* a - f */				) )			errantHex = ( iTestHex != 2 );		else if ( guessedCt == 1				  && ( ( test[ 1 ] >= '0' && test[ 1 ] <= '9' )					   || ( test[ 1 ] >= 'A' && test[ 1 ] <= 'F' )					   || ( test[ 1 ] >= 'a' && test[ 1 ] <= 'f' )					 ) )			errantHex = ( iTestHex != 1 );		else 			errantHex = ( iTestHex != 0 );		if ( errant || errantHex )			++errCt;		if ( errant || errantHex || ( showAll == 2 )			 || ( showAll == 1 				  && ( ( guessedCt == 1 && ( test[ 2 ] == '\00' || test[ 2 ] == '\xff' ) )					   || ( guessedCt == 2 							&& ( iTest != 0 || iTestHex != 0 								 || ( collected >= 0x8200 && collected <= 0x82ff ) ) ) ) ) 		   )		{	printf( "0x%03x%02x (0x%05lx) <%s> is %3s a digit %7s %14s,", 					(ubyte) test[ 1 ], (ubyte) test[ 2 ], 					collected, test + 1, 					( iTest != 0 ) ? "" : "NOT", 					errant ? "*ERROR*" : "",					isdigit( collected ) ? "(ANSI C digit)" : "" );			printf( "\t\tis %3s a hex digit %7s %14s\n", 					( iTestHex != 0 ) ? "" : "NOT", 					errantHex ? "*ERROR*" : "",					isxdigit( collected ) ? "(ANSI C xdigit)" : "" );		}	}	return errCt;}/** This one needs the error counting stuff! */static void testcasing( void ){	char test[ k_testStringWidth ] = { 0 };	char targ[ k_testStringWidth ] = { 0 };	int count;	for ( test[ 0 ] = test[ 1 ] = 0; test[ 0 ] < 1; incString( test, 1, 1 ) )	{	count = slowsjPToLower( test + 1, targ );		printf( "0x%02x <%s> \tlower(%d) \t=> <%s>\n", 				(ubyte) test[ 1 ], test + 1, count, targ );		targ[ 1 ] = '\0';	}	for ( test[ 0 ] = test[ 1 ] = test[ 2 ] = 0; 		  test[ 0 ] < 1; 		  incString( test, 2, 1 ) )	{	count = slowsjPToLower( test + 1, targ );		printf( "0x%02x%02x <%s> \tlower(%d) \t=> <%s>\n", 				(ubyte) test[ 1 ], (ubyte) test[ 2 ], test + 1, count, targ );		targ[ 1 ] = targ[ 2 ] = '\0';	}}int main( int argc, char * argv[] ){	long errorCt = 0;	int showAll = 0;	commandLine( &argc, &argv );	if ( argc < 2 || argv[ 1 ][ 0 ] != '-' )	{	printf( "%s: -<test> [ -all | -interesting ]\n", argv[ 0 ] );		puts( "tests:" );		puts( "\t-p1byte | -p7bit" );		puts( "\t-phibyte | -plobyte" );		puts( "\t-pguesscount" );		puts( "\t-pspace | pcntrl" );		puts( "\t-pdigit" );		puts( "\t-p2upcase | -p2locase" );		return EXIT_SUCCESS;	}	if ( argc > 2 )	{	showAll = ( strncmp( argv[ 2 ], "-all", 2 ) == 0 ) ? 2 : 1;	}/* */	if ( strncmp( argv[ 1 ] + 1, "p1byte", 2 ) == 0 )		errorCt = testIsPOneByte( showAll );	else if ( strncmp( argv[ 1 ] + 1, "phibyte", 4 ) == 0 )		errorCt = testIsPHighByte( showAll );	else if ( strncmp( argv[ 1 ] + 1, "plobyte", 4 ) == 0 )		errorCt = testIsPLowByte( showAll );	else if ( strncmp( argv[ 1 ] + 1, "p7bit", 2 ) == 0 )		errorCt = testIsP7bit( showAll );	else if ( strncmp( argv[ 1 ] + 1, "pguesscount", 2 ) == 0 )		errorCt = testPGuessCount( showAll );	else if ( strncmp( argv[ 1 ] + 1, "pcntrl", 2 ) == 0 )		errorCt = testIsPCntrl( showAll );	else if ( strncmp( argv[ 1 ] + 1, "pspace", 2 ) == 0 )		errorCt = testPSpace( showAll );	else if ( strncmp( argv[ 1 ] + 1, "pdigit", 2 ) == 0 )		errorCt = testPDigit( showAll );	else if ( strncmp( argv[ 1 ] + 1, "p2locase", 4 ) == 0 )		testcasing();	else		puts( "No test specified." );	printf( "%s \t %ld errors!\n", 			( errorCt > 0 ) ? "*******" : "", errorCt );	return EXIT_SUCCESS;}