mirror of
				https://gitlab.freedesktop.org/wlroots/wlroots.git
				synced 2025-11-03 09:01:40 -05:00 
			
		
		
		
	
		
			
	
	
		
			67 lines
		
	
	
	
		
			1.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			67 lines
		
	
	
	
		
			1.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								#include <stdint.h>
							 | 
						||
| 
								 | 
							
								#include "util/utf8.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static bool in_range(char x, uint8_t low, uint8_t high) {
							 | 
						||
| 
								 | 
							
									uint8_t v = (uint8_t)x;
							 | 
						||
| 
								 | 
							
									return low <= v && v <= high;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								bool is_utf8(const char *string) {
							 | 
						||
| 
								 | 
							
									/* Returns true iff the string is 'well-formed', as defined by
							 | 
						||
| 
								 | 
							
									 * Unicode Standard 15.0.0. See Chapter 3, D92 and Table 3.7.
							 | 
						||
| 
								 | 
							
									 *
							 | 
						||
| 
								 | 
							
									 * UTF-8 strings are sequences of code points encoded in one of the
							 | 
						||
| 
								 | 
							
									 * following ways. The first byte determines the pattern.
							 | 
						||
| 
								 | 
							
									 *
							 | 
						||
| 
								 | 
							
									 * 00..7F
							 | 
						||
| 
								 | 
							
									 * C2..DF 80..BF
							 | 
						||
| 
								 | 
							
									 * E0     A0..BF 80..BF
							 | 
						||
| 
								 | 
							
									 * E1..EC 80..BF 80..BF
							 | 
						||
| 
								 | 
							
									 * ED     80..9F 80..BF
							 | 
						||
| 
								 | 
							
									 * EE..EF 80..BF 80..BF
							 | 
						||
| 
								 | 
							
									 * F0     90..BF 80..BF 80..BF
							 | 
						||
| 
								 | 
							
									 * F1..F3 80..BF 80..BF 80..BF
							 | 
						||
| 
								 | 
							
									 * F4     80..8F 80..BF 80..BF
							 | 
						||
| 
								 | 
							
									 */
							 | 
						||
| 
								 | 
							
									uint8_t range_table[9][8] = {
							 | 
						||
| 
								 | 
							
										{0x00, 0x7F},
							 | 
						||
| 
								 | 
							
										{0xC2, 0xDF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xE1, 0xEC, 0x80, 0xBF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xED, 0xED, 0x80, 0x9F, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xEE, 0xEF, 0x80, 0xBF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
										{0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF},
							 | 
						||
| 
								 | 
							
									};
							 | 
						||
| 
								 | 
							
									int lengths[9] = {
							 | 
						||
| 
								 | 
							
										1, 2, 3, 3, 3, 3, 4, 4, 4
							 | 
						||
| 
								 | 
							
									};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									while (string[0]) {
							 | 
						||
| 
								 | 
							
										bool accept = false;
							 | 
						||
| 
								 | 
							
										for (int i = 0; i < 9; i++) {
							 | 
						||
| 
								 | 
							
											if (!in_range(string[0], range_table[i][0],
							 | 
						||
| 
								 | 
							
													range_table[i][1])) {
							 | 
						||
| 
								 | 
							
												continue;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											for (int j = 1; j < lengths[i]; j++) {
							 | 
						||
| 
								 | 
							
												if (!in_range(string[j], range_table[i][2 * j],
							 | 
						||
| 
								 | 
							
														range_table[i][2 * j + 1])) {
							 | 
						||
| 
								 | 
							
													// Early exit is necessary to avoid
							 | 
						||
| 
								 | 
							
													// reading past the null terminator
							 | 
						||
| 
								 | 
							
													return false;
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											string += lengths[i];
							 | 
						||
| 
								 | 
							
											accept = true;
							 | 
						||
| 
								 | 
							
											break;
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										if (!accept) {
							 | 
						||
| 
								 | 
							
											return false;
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									return true;
							 | 
						||
| 
								 | 
							
								}
							 |