chapter 10 - character strings

61
Chapter 10 - Character Strings

Upload: callie-rice

Post on 31-Dec-2015

44 views

Category:

Documents


1 download

DESCRIPTION

Chapter 10 - Character Strings. Array of Characters. char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’ };. Program 10.1. /* * Function to concatenate two character strings */ #include concat (char result[], char str1[], int n1, char str2[], int n2) { int i; - PowerPoint PPT Presentation

TRANSCRIPT

Chapter 10 - Character Strings

Array of Characters

char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’ };

word[0] 'H'word[1] 'e'word[2] 'l'word[3] 'l'word[4] 'o'word[5] '!'

Program 10.1/* * Function to concatenate two character strings */#include <stdio.h>

concat (char result[], char str1[], int n1, char str2[], int n2){ int i; // copy str1 to result for (i = 0; i < n1; i++) result[i] = str1[i]; for (i = 0; i < n2; i++) result[n1 + i] = str2[i]; }

Program 10.1 (continued)main(){ char s1[5] = {'T', 'e', 's', 't', ' '}; char s2[6] = {'w', 'o', 'r', 'k', 's', '.'}; char s3[11]; int i; concat (s3, s1, 5, s2, 6); for (i = 0; i < 11; i++) printf("%c", s3[i]); printf ("\n");

system ("PAUSE");}

Program 10.1 Output

Variable Length Character Strings

word[0] 'H'word[1] 'e'word[2] 'l'word[3] 'l'word[4] 'o'word[5] '!'word[6] '\0'

char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’, ‘\0’ };

NULL Character Terminates a String

Program 10.2/* * Function to count the number of characters in a string */#include <stdio.h>

int string_length (char string[]){ int count = 0; while(string[count] != '\0') count++; return (count);}

Program 10.2 (continued)main(){ char word1[] = {'a', 's', 't', 'e', 'r', '\0'}; char word2[] = {'a', 't', '\0'}; char word3[] = {'a', 'w', 'e', '\0'}; int i; printf ("%i %i %i\n", string_length (word1), string_length (word2), string_length (word3));

system ("PAUSE");}

Program 10.2 Output

Initializing and Displaying Character Strings

char word[] = “Hello!”;

char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’, ‘\0’ };

char word[7] = “Hello!”;

char word[6] = “Hello!”;

These Statements Are Equivalent

Leave Room for the NULL Character

Program 10.3/* * Function to concatenate two character strings */#include <stdio.h>

concat (char result[], char str1[], char str2[]){ int i; int j; // copy str1 to result for (i = 0; str1[i] != '\0'; i++) result[i] = str1[i]; for (j = 0; str2[j] != '\0'; j++) result[i + j] = str2[j]; result[i+j] = '\0'; }

Program 10.3 (continued)main(){ char s1[] = "Test "; char s2[] = "works."; char s3[20];

concat (s3, s1, s2); printf("%s\n", s3);

system ("PAUSE");}

Program 10.3 Output

Testing Two Character Strings for Equality

Since the C Programming Language does not support a data type of string we cannot directly test two strings to see if they are equal with a statement such as

if ( string1 == string2 )

<string.h>

Program 10.4/* * Function to determine if two strings are equal */#include <stdio.h>#define TRUE 1#define FALSE 0

int equal_strings (char s1[], char s2[]){ int i = 0; while ((s1[i] == s2[i]) && (s1[i] != '\0') && (s2[i] != '\0')) i++; if ((s1[i] == '\0') && (s2[i] == '\0')) return(TRUE); else return(FALSE); }

Program 10.4 (continued)main(){ char stra[] = "string compare test"; char strb[] = "string"; printf ("%i\n", equal_strings (stra, strb)); printf ("%i\n", equal_strings (stra, stra)); printf ("%i\n", equal_strings (strb, "string"));

system ("PAUSE");}

Program 10.4 Output

Inputting Character Strings

char string[81];

scanf ( “%s”, string);

char s1[81], s2[81], s3[81];

scanf ( “%s%s%s”, s1, s2, s3);

Program 10.5/* * Program to illustrate the %s scanf format characters */#include <stdio.h>

main(){ char s1[81]; char s2[81]; char s3[81];

printf ("Enter text:\n"); scanf ("%s%s%s", s1, s2, s3); printf ("\ns1 = %s\ns2 = %s\ns3 = %s\n", s1, s2, s3);

system ("PAUSE");}

Program 10.5 Output

Program 10.6/* * Function to read a line of text from a terminal */#include <stdio.h>

read_line (char buffer[]){ char character; int i = 0; do { character = getchar(); buffer[i] = character; i++; } while (character != '\n'); buffer[i-1] = '\0'; }

Program 10.6 (continued)main(){ char line[81]; int i;

for (i=0; i < 3; i++) { read_line(line); printf ("%s\n\n", line); }

system ("PAUSE");}

Program 10.6 Output

Program 10.7#include <stdio.h>#define TRUE 1#define FALSE 0

/* * Function to determine if a character is alphabetic */alphabetic (char c){ if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) return (TRUE); else return (FALSE); }

Program 10.7 (continued)/* * Function to count the number of words in a string */count_words (char string[]){ int looking_for_word = TRUE; int word_count = 0; int i; for (i=0; string[i] != '\0'; i++) { if (alphabetic(string[i])) { if (looking_for_word) { word_count++; looking_for_word = FALSE; } } else looking_for_word = TRUE; } return (word_count); }

Program 10.7 (continued)main(){ char text1[] = "Well, here goes."; char text2[] = "And here we go... again."; printf ("%s - words = %i\n", text1, count_words (text1)); printf ("%s - words = %i\n", text2, count_words (text2));

system ("PAUSE");}

Program 10.7 Output

Execution of count_words Functioni string[i] word_count looking_for_word

0 10 'W' 1 01 'e' 1 02 'l' 1 03 'l' 1 04 ',' 1 15 ‘ ' 1 16 'h' 2 07 'e' 2 08 'r' 2 09 'e' 2 0

10 ‘ ' 2 111 'g' 3 012 'o' 3 013 'e' 3 014 's' 3 015 '.' 3 116 '\0' 3 1

The NULL String

A Character String that contains no characters other than the NULL Character has a special name in the C Programming Language, it is called the NULL String. The string length will correctly return 0.

char buffer[100] = “”;

Program 10.8#include <stdio.h>#define TRUE 1#define FALSE 0

/***** Insert alphabetic function here *****/

/***** Insert read_line function here *****/

/***** Insert count_words function here *****/

Program 10.8 (continued)main(){ char text[81]; int end_of_text = 0; int total_words = 0; printf ("Type in your text.\n"); printf ("When you are done, press 'RETURN'.\n\n"); while (!end_of_text) { read_line (text); if (text[0] == '\0') end_of_text = TRUE; else total_words += count_words (text); } printf("\nThere are %i words in the above text.\n", total_words); system ("PAUSE");}

Program 10.8 Output

Escape Characters Escape Character Name

\a audible alert

\b backspace

\f form feed

\n newline

\r carriage return

\t horizontal tab

\v vertical tab

\\ backslash

\” double quote

\' single quote

\? question mark

\nnn octal character value nnn

\xnn hexadecimal character value nn

More on Constant StringsIf you put a backslash character at the very end of the line and followed it immediately by a carriage return, it will tell the C Compiler to ignore the end of line. This line continuation technique is used primarily for continuing long constant character strings.

char letters[] = “abcdefghijklmnopqrstuvwxyz\ABCDEFGHIJKLMNOPQRSTUVWXYZ”;

An even easier way of breaking up long character strings is to divide them into two or more adjacent strings.

char letters[] = “abcdefghijklmnopqrstuvwxyz” “ABCDEFGHIJKLMNOPQRSTUVWXYZ”;

Character Strings, Structures, and Arrays

Suppose we wanted to write a computer program that acted as a dictionary. One of the first thoughts would be to representation of the word and it definition. Since the word and its definition are logically related, the notion of a structure comes immediately to mind.

struct entry { char word[10]; char definition[50]; };

struct entry dictionary[100];

Program 10.9/* * Dictionary lookup program */#include <stdio.h>#define TRUE 1#define FALSE 0

struct entry{ char word[10]; char definition[50];};

/***** Insert equal_string function here *****/

Program 10.9/* * Function to lookup a word inside a dictionary */int lookup (struct entry dictionary[], char search [], int entries){ int i;

for (i=0; i < entries; i++) if (equal_strings (search, dictionary[i].word)) return (i); return (-1);}

Program 10.9main(){ struct entry dictionary[100] = { { "aardvark", "a burrowing African mammal" }, { "abyss", "a bottomless pit" }, { "acumen", "mentally sharp; keen" }, { "addle", "to become confused" }, { "aerie", "a high nest" }, { "affix", "to append; attach" }, { "agar", "a jelly made from seaweed" }, { "ahoy", "a nautical call of greeting" }, { "aigrette", "an ornamental cluster of feathers" }, { "ajar", "partially opened" } }; char word[10]; int entries = 10; int entry_number;

Program 10.9 printf("Enter word: "); scanf("%9s", word); entry_number = lookup (dictionary, word, entries); if (entry_number != -1) printf ("%s\n", dictionary[entry_number].definition); else printf ("Sorry, that word is not in my dictionary.\n"); system ("PAUSE");}

Program 10.9 Output

A Better Search Method

Step 1: Set low to 0, high to n – 1

Step2: If low > high, x does not exist in M and the algorithm terminates

Step 3: Set mid to (low + high) / 2

Step 4: if M[mid] < x, set low to mid +1 and go to Step 2

Step 5: if M[mid] > x, set high to mid -1 and go to Step 2

Step 6: M[mid] equals x and the algorithm terminates

Binary Search Algorithm

Binary Search Algorithm/* * Binary Search Algorithm */int lookup (struct entry M[], char x[], int n){ int low = 0; /* Step 1: */ int high = n - 1; /* Step 1: */ int mid; int result; while (low <= high) { mid = (low + high) / 2; /* Step 3: */ result = compare_strings (M[mid].word, x); if (result == -1) low = mid + 1; /* Step 4: */ else if (result == 1) high = mid - 1; /* Step 5: */ else return (mid); /* Step 6: */ } return (-1); /* Step 2: */}

Examples of Binary Search

Examples of Binary Search

Examples of Binary Search

Program 10.10/* * Dictionary lookup program */#include <stdio.h>

struct entry{ char word[10]; char definition[50];};

Program 10.10 (continued)/* * Function to compare two character strings */int compare_strings (char s1[], char s2[]){ int i = 0; while ((s1[i] == s2[i]) && (s1[i] != '\0') && (s2[i] != '\0')) i++; if (s1[i] < s2[i] ) /* s1 < s2 */ return(-1); else if (s1[i] == s2[i]) /* s1 == s2 */ return (0); else /* s1 > s2 */ return(+1); }

Program 10.10 (continued)/* * Function to lookup a word inside a dictionary */int lookup (struct entry dictionary[], char search [], int entries){ int low = 0; int high = entries - 1; int mid; int result; while (low <= high) { mid = (low + high) / 2; result = compare_strings (dictionary[mid].word, search); if (result == -1) low = mid + 1; else if (result == 1) high = mid - 1; else return (mid); /* found it */ } return (-1); /* not found */}

Program 10.10 (continued)main(){ struct entry dictionary[100] = { { "aardvark", "a burrowing African mammal" }, { "abyss", "a bottomless pit" }, { "acumen", "mentally sharp; keen" }, { "addle", "to become confused" }, { "aerie", "a high nest" }, { "affix", "to append; attach" }, { "agar", "a jelly made from seaweed" }, { "ahoy", "a nautical call of greeting" }, { "aigrette", "an ornamental cluster of feathers" }, { "ajar", "partially opened" } }; char word[10]; int entries = 10; int entry_number;

Program 10.10 (continued) printf("Enter word: "); scanf("%9s", word); entry_number = lookup (dictionary, word, entries); if (entry_number != -1) printf ("%s\n", dictionary[entry_number].definition); else printf ("Sorry, that word is not in my dictionary.\n"); system ("PAUSE");}

Program 10.10 Output

Program 10.10 Output (Rerun)

ASCII Character Table0 0 00 0000 0000 NUL (Null char.)1 1 01 0000 0001 SOH (Start of Header)2 2 02 0000 0010 STX (Start of Text)3 3 03 0000 0011 ETX (End of Text)4 4 04 0000 0100 EOT (End of Transmission)5 5 05 0000 0101 ENQ (Enquiry)6 6 06 0000 0110 ACK (Acknowledgment)7 7 07 0000 0111 BEL (Bell)8 10 08 0000 1000 BS (Backspace)9 11 09 0000 1001 HT (Horizontal Tab)10 12 0A 0000 1010 LF (Line Feed)11 13 0B 0000 1011 VT (Vertical Tab)12 14 0C 0000 1100 FF (Form Feed)13 15 0D 0000 1101 CR (Carriage Return)14 16 0E 0000 1110 SO (Shift Out)15 17 0F 0000 1111 SI (Shift In)16 20 10 0001 0000 DLE (Data Link Escape)17 21 11 0001 0001 DC1 (XON) (Device Control 1)18 22 12 0001 0010 DC2 (Device Control 2)19 23 13 0001 0011 DC3 (XOFF)(Device Control 3)20 24 14 0001 0100 DC4 (Device Control 4)21 25 15 0001 0101 NAK (Negativ Acknowledgemnt)22 26 16 0001 0110 SYN (Synchronous Idle)23 27 17 0001 0111 ETB (End of Trans. Block)24 30 18 0001 1000 CAN (Cancel)25 31 19 0001 1001 EM (End of Medium)26 32 1A 0001 1010 SUB (Substitute)27 33 1B 0001 1011 ESC (Escape)28 34 1C 0001 1100 FS (File Separator)29 35 1D 0001 1101 GS (Group Separator)30 36 1E 0001 1110 RS (Reqst to Send)(Rec. Sep.)31 37 1F 0001 1111 US (Unit Separator)32 40 20 0010 0000 SP (Space)33 41 21 0010 0001 ! (exclamation mark)34 42 22 0010 0010 " (double quote)35 43 23 0010 0011 # (number sign)36 44 24 0010 0100 $ (dollar sign)37 45 25 0010 0101 % (percent)38 46 26 0010 0110 & (ampersand)39 47 27 0010 0111 ' (single quote)40 50 28 0010 1000 ( (left/open parenthesis)41 51 29 0010 1001 ) (right/closing parenth.)42 52 2A 0010 1010 * (asterisk)43 53 2B 0010 1011 + (plus)44 54 2C 0010 1100 , (comma)45 55 2D 0010 1101 - (minus or dash)46 56 2E 0010 1110 . (dot)47 57 2F 0010 1111 / (forward slash)48 60 30 0011 0000 049 61 31 0011 0001 150 62 32 0011 0010 251 63 33 0011 0011 352 64 34 0011 0100 453 65 35 0011 0101 554 66 36 0011 0110 655 67 37 0011 0111 756 70 38 0011 1000 857 71 39 0011 1001 958 72 3A 0011 1010 : (colon)59 73 3B 0011 1011 ; (semi-colon)60 74 3C 0011 1100 < (less than)61 75 3D 0011 1101 = (equal sign)62 76 3E 0011 1110 > (greater than)63 77 3F 0011 1111 ? (question mark)

64 100 40 0100 0000 @ (AT symbol)65 101 41 0100 0001 A66 102 42 0100 0010 B67 103 43 0100 0011 C68 104 44 0100 0100 D69 105 45 0100 0101 E70 106 46 0100 0110 F71 107 47 0100 0111 G72 110 48 0100 1000 H73 111 49 0100 1001 I74 112 4A 0100 1010 J75 113 4B 0100 1011 K76 114 4C 0100 1100 L77 115 4D 0100 1101 M78 116 4E 0100 1110 N79 117 4F 0100 1111 O80 120 50 0101 0000 P81 121 51 0101 0001 Q82 122 52 0101 0010 R83 123 53 0101 0011 S84 124 54 0101 0100 T85 125 55 0101 0101 U86 126 56 0101 0110 V87 127 57 0101 0111 W88 130 58 0101 1000 X89 131 59 0101 1001 Y90 132 5A 0101 1010 Z91 133 5B 0101 1011 [ (left/opening bracket)92 134 5C 0101 1100 \ (back slash)93 135 5D 0101 1101 ] (right/closing bracket)94 136 5E 0101 1110 ^ (caret/circumflex)95 137 5F 0101 1111 _ (underscore)96 140 60 0110 0000 `97 141 61 0110 0001 a98 142 62 0110 0010 b99 143 63 0110 0011 c100 144 64 0110 0100 d101 145 65 0110 0101 e102 146 66 0110 0110 f103 147 67 0110 0111 g104 150 68 0110 1000 h105 151 69 0110 1001 i106 152 6A 0110 1010 j107 153 6B 0110 1011 k108 154 6C 0110 1100 l109 155 6D 0110 1101 m110 156 6E 0110 1110 n111 157 6F 0110 1111 o112 160 70 0111 0000 p113 161 71 0111 0001 q114 162 72 0111 0010 r115 163 73 0111 0011 s116 164 74 0111 0100 t117 165 75 0111 0101 u118 166 76 0111 0110 v119 167 77 0111 0111 w120 170 78 0111 1000 x121 171 79 0111 1001 y122 172 7A 0111 1010 z123 173 7B 0111 1011 { (left/opening brace)124 174 7C 0111 1100 | (vertical bar)125 175 7D 0111 1101 } (right/closing brace)126 176 7E 0111 1110 ~ (tilde)127 177 7F 0111 1111 DEL (delete)

ASCII Alphabetic Characters65 101 41 0100 0001 A66 102 42 0100 0010 B67 103 43 0100 0011 C68 104 44 0100 0100 D69 105 45 0100 0101 E70 106 46 0100 0110 F71 107 47 0100 0111 G72 110 48 0100 1000 H73 111 49 0100 1001 I74 112 4A 0100 1010 J75 113 4B 0100 1011 K76 114 4C 0100 1100 L77 115 4D 0100 1101 M78 116 4E 0100 1110 N79 117 4F 0100 1111 O80 120 50 0101 0000 P81 121 51 0101 0001 Q82 122 52 0101 0010 R83 123 53 0101 0011 S84 124 54 0101 0100 T85 125 55 0101 0101 U86 126 56 0101 0110 V87 127 57 0101 0111 W88 130 58 0101 1000 X89 131 59 0101 1001 Y90 132 5A 0101 1010 Z

97 141 61 0110 0001 a98 142 62 0110 0010 b99 143 63 0110 0011 c100 144 64 0110 0100 d101 145 65 0110 0101 e102 146 66 0110 0110 f103 147 67 0110 0111 g104 150 68 0110 1000 h105 151 69 0110 1001 i106 152 6A 0110 1010 j107 153 6B 0110 1011 k108 154 6C 0110 1100 l109 155 6D 0110 1101 m110 156 6E 0110 1110 n111 157 6F 0110 1111 o112 160 70 0111 0000 p113 161 71 0111 0001 q114 162 72 0111 0010 r115 163 73 0111 0011 s116 164 74 0111 0100 t117 165 75 0111 0101 u118 166 76 0111 0110 v119 167 77 0111 0111 w120 170 78 0111 1000 x121 171 79 0111 1001 y122 172 7A 0111 1010 z

Character Operations

Whenever a character constant or variable is used in an expression, it is automatically converted to, and subsequently treated as, an integer value. For example,

(‘a’ <= c) && (c <= ‘z’)

(97 <= c) && (c <= 122)

ASCII Numeric Characters

48 60 30 0011 0000 0

49 61 31 0011 0001 1

50 62 32 0011 0010 2

51 63 33 0011 0011 3

52 64 34 0011 0100 4

53 65 35 0011 0101 5

54 66 36 0011 0110 6

55 67 37 0011 0111 7

56 70 38 0011 1000 8

57 71 39 0011 1001 9

Character Operations

Suppose the character variable c contained one of the characters ‘0’ thru ‘9’ and the we wished to convert this value into the corresponding integer 0 thru 9. Since the digits of virtually all character sets are represented by sequential integer values, we can easily convert c into its integer equivalent by subtracting the character constant ‘0’ from it. For example,

char c = ‘5’;int i = c – ‘0’;

c = 53;i = 53 – 48;

Program 10.11/* * Function to convert a string to an integer */#include <stdio.h>

int string_to_integer (char string[]){ int integer_value; int result = 0; int i = 0; for (i=0; ('0' <= string[i]) && (string[i] <= '9'); i++) { integer_value = string[i] - '0'; result = result *10 + integer_value; }

return(result); }

Program 10.11 (continued)main(){ printf ("%i\n", string_to_integer ("245")); printf ("%i\n", string_to_integer ("100") + 25); printf ("%i\n", string_to_integer ("13x5")); system ("PAUSE");}

Program 10.11 Output