WONKY



LOG | FILES | OVERVIEW


#ifndef WONKY_STRING_C
#define WONKY_STRING_C WONKY_STRING_C
#include <wonky_string.h>

struct wonky_str wonky_string_make()
{
	struct wonky_str ret;
	ret.cs=wonky_arr_alloc(1,1);
	if(!wonky_arr_oom(ret.cs))
	{
		ret.cs[0]='\0';
		ret.number_of_glyphs=1;/*\0 is a glyph*/
	}else
	{
		ret.cs=NULL;
		ret.number_of_glyphs=0;
	}
	return ret;
}
struct wonky_str wonky_string_copy(const struct wonky_str str)
{
	struct wonky_str ret=(struct wonky_str){.cs=wonky_arr_copy(str.cs),.number_of_glyphs=str.number_of_glyphs};
	wonky_string_recount_glyphs(&ret);
	return ret;
}
struct wonky_str wonky_string_from_cstr(const char *str)
{
	struct wonky_str ret;
	if(str)
	{
		size_t str_len;
		str_len=gstrlen(str);
		ret.cs=wonky_arr_alloc(str_len+1,1);
		ret.number_of_glyphs=1;
		if(!wonky_arr_oom(ret.cs))
			gmemmove(ret.cs,str,str_len+1);
		wonky_string_recount_glyphs(&ret);
		return ret;
	}else
	{
		ret.cs=NULL;
		ret.number_of_glyphs=0;
	}
	return ret;
}
size_t wonky_string_number_of_bytes(const struct wonky_str str)
{
	if(!wonky_arr_oom(str.cs))
		return wonky_arr_size(str.cs);
	else
		return 0;
}
size_t wonky_string_recount_glyphs(struct wonky_str *str)
{
	size_t glyph_count=0;
	for(size_t i=0,j=1;j!=i && i<wonky_string_number_of_bytes(*str);j=i,i=wonky_string_following_glyph_position(*str,i))
	{
		++glyph_count;
	}
	str->number_of_glyphs=glyph_count;
	return glyph_count;
}
size_t wonky_string_length(const struct wonky_str str)
{
	return str.number_of_glyphs;
}
_Bool wonky_string_oom(struct wonky_str str)
{
	return wonky_arr_oom(str.cs);
}

/*get element(unicode) value from index'th glyph, indices starting from 0*/
uint32_t wonky_string_glyph(const struct wonky_str str,size_t glyph_index)
{
	/*get byte index of glyph then extract the value and return it*/
	return wonky_string_glyph_at_position(str,wonky_string_glyph_position(str,glyph_index));
}
/*returns the index of the starting byte of the utf8 encoded glyph_indexed glyph*/
size_t wonky_string_glyph_position(const struct wonky_str str,size_t glyph_index)
{
	size_t byte_index,i;
	/*walk glyphs till we reach glyph_index or run out of bytes*/
	for(byte_index=0,i=0;i<glyph_index;++i,byte_index=wonky_string_following_glyph_position(str,byte_index));
	/*if this could be last byte index in string*/
	return byte_index;
}
/*returns index of starting byte of next valid glyph after byte_index'th byte in string*/
size_t wonky_string_following_glyph_position(const struct wonky_str str,size_t byte_index)
{
	short glyph_size=0;
	/*skip invalid bytes*/
	while(byte_index<wonky_string_number_of_bytes(str) && (glyph_size=wonky__utf8_glyph_size(str,byte_index))==0)
	{
		++byte_index;
	}
	byte_index+=glyph_size;
	return (byte_index<wonky_string_number_of_bytes(str)?byte_index:wonky_arr_last_index(str.cs)+1);
}
/*
 returns glyph value of utf8 encoded glyph starting at byte_index'th index'th byte of string
 if utf8 encoded glyph at byte_index'th byte is invalid this function returns 0 
 */
uint32_t wonky_string_glyph_at_position(const struct wonky_str str,size_t byte_index)
{
	short glyph_size;
	glyph_size=wonky__utf8_glyph_size(str,byte_index);
	if(glyph_size==0)
		return 0;
	else
		return wonky__utf8_glyph_value(str,byte_index,glyph_size);
}

_Bool wonky_string_push_codepoint(struct wonky_str *str,uint32_t ch)
{
	short codepoint_size;
	
	codepoint_size=wonky__utf8_codepoint_size(ch);	
	str->cs=wonky_arr_expand(str->cs,codepoint_size);

	if(wonky_arr_oom(str->cs))
		return 0;


	str->cs[wonky_arr_last_index(str->cs)]='\0';
	wonky__utf8_encode_codepoint(str->cs+wonky_arr_last_index(str->cs)-codepoint_size,ch,codepoint_size);

	++str->number_of_glyphs;
	return 1;
}
_Bool wonky_string_push_byte(struct wonky_str *str,unsigned char ch)
{
	if(str==NULL || wonky_arr_oom(str->cs))
		return 0;
	str->cs=wonky_arr_expand(str->cs,1);
	if(wonky_arr_oom(str->cs))
		return 0;

	str->cs[wonky_arr_last_index(str->cs)]='\0';
	str->cs[wonky_arr_last_index(str->cs)-1]=ch;
	return 1;
}
_Bool wonky_string_append(struct wonky_str *str,const char *right) 
{
{
	if(str==NULL || right==NULL)
		return 0;
		
	size_t right_size;
	right_size=gstrlen(right);
	str->cs=wonky_arr_expand(str->cs,right_size);

	gmemmove(str->cs+wonky_arr_last_index(str->cs)-right_size,right,right_size);
	str->cs[wonky_arr_last_index(str->cs)]='\0';

	wonky_string_recount_glyphs(str);
	return 1;

}
}
_Bool wonky_string_preppend(struct wonky_str *str,char *left)
{
	if(str==NULL || left==NULL)
		return 0;
		
	size_t left_size;
	left_size=gstrlen(left);
	str->cs=wonky_arr_expand(str->cs,left_size);


	gmemmove(str->cs+left_size,str->cs,wonky_string_number_of_bytes(*str)-left_size);
	gmemmove(str->cs,left,left_size);

	wonky_string_recount_glyphs(str);
	return 1;
}
_Bool wonky_string_insert(struct wonky_str *str,char *infix,size_t glyph_index)
{
	if(str==NULL || infix==NULL || glyph_index>wonky_string_length(*str)-1)
		return 0;
		
	size_t infix_size;
	size_t where;
	infix_size=gstrlen(infix);
	where=wonky_string_glyph_position(*str,glyph_index);
	str->cs=wonky_arr_expand(str->cs,infix_size);

	gmemmove(str->cs+where+infix_size,str->cs+where,wonky_string_number_of_bytes(*str)-where-infix_size);
	gmemmove(str->cs+where,infix,infix_size);

	wonky_string_recount_glyphs(str);
	return 1;
}
_Bool wonky_string_delete_chars(struct wonky_str *str,size_t starting_glyph_index,size_t number_of_glyphs)
{
	if(str==NULL)
		return 0;
	size_t start_offset=wonky_string_glyph_position(*str,starting_glyph_index);
	size_t offset=start_offset;
	for(size_t i=0;i<number_of_glyphs;++i,offset=wonky_string_following_glyph_position(*str,offset));
	if(offset==0 || offset>wonky_string_number_of_bytes(*str))
		return 0;
	gmemmove(str->cs+start_offset,str->cs+offset,wonky_string_number_of_bytes(*str)-offset+start_offset);
	wonky_arr_shrink(str->cs,offset-start_offset);
	wonky_string_recount_glyphs(str);
	return 1;
}

_Bool wonky_string_eqal(const struct wonky_str a,const struct wonky_str b)
{
	if(wonky_string_number_of_bytes(a)!=wonky_string_number_of_bytes(b))
		return 0;
	return !gmemcmp(a.cs,b.cs,wonky_string_number_of_bytes(a));
}
_Bool wonky_string_cequal(const struct wonky_str a,const char *b)
{
	return !gstrn_cmp(a.cs,b,wonky_string_number_of_bytes(a));
}


int wonky_string_printf(struct wonky_str *destination,const char *format,...)
{
	va_list args;
	int ret;

	va_start(args,format);
	ret=wonky_string_vprintf(destination,format,args);	
	va_end(args);

	return ret;
}
int wonky_string_vprintf(struct wonky_str *destination,const char *format,va_list args)
{
	if(format==NULL || destination==NULL)
		return 0;

	struct wonky__scanformat fmt;
	struct wonky_stream s=wonky_string_stream(destination);
	short indentation=0;
	wonky_fseek(&s,0,SEEK_END);

	for(size_t i=0;format[i]!='\0';)
	{
		if(format[i]=='%')
		{
			wonky__parse_scan_format(format+i,&fmt);
			wonky__from_scanformat(&fmt,args,&s,&indentation);
			i+=fmt.forward_crawl;
		}else
		{
			wonky_write(&s,(void*)format+i,1);
			++i;
		}
	}
	wonky_stream_delete(&s);

	return 0;
}

short wonky__utf8_get_glyph_size_from_starting_codepoint(unsigned char leading_byte)
{
	if(leading_byte<0x7F)
		return 1;
	else if(leading_byte&0xE0==0xC0)
		return 2;
	else if(leading_byte&0xF0==0xE0)
		return 3;
	else if(leading_byte&0xF8==0xF0)
		return 4;
	else if(leading_byte&0xFC==0xF8)
		return 5;
	else if(leading_byte&0xFE==0xFC)
		return 6;
	else
		return 0;
}
short wonky__utf8_glyph_size(struct wonky_str str,size_t byte_index)
{

	if(str.cs[byte_index]<0x7F) /*we were on top of an ascii char*/
	{
		return 1;
	}else if((str.cs[byte_index]&0xE0) == 0xC0)
	{
		if(byte_index+2<wonky_string_number_of_bytes(str)
				&& (str.cs[byte_index+1]&0xC0)==0x80)
			return 2;
		else
			return 0;
	}else if( (str.cs[byte_index]&0xF0) == 0xE0)
	{
		if(byte_index+3<wonky_string_number_of_bytes(str)
			     && (str.cs[byte_index+1]&0xC0)==0x80 
			     && (str.cs[byte_index+2]&0xC0)==0x80)
			return 3;
		else
			return 0;
	}else if( (str.cs[byte_index]&0xF8) == 0xF0)
	{
		if(byte_index+4<wonky_string_number_of_bytes(str)
			     && (str.cs[byte_index+1]&0xC0)==0x80 
			     && (str.cs[byte_index+2]&0xC0)==0x80 
			     && (str.cs[byte_index+3]&0xC0)==0x80)
			return 4;
		else
			return 0;
	}else if( (str.cs[byte_index]&0xFC) == 0xF8)
	{
		if(byte_index+5<wonky_string_number_of_bytes(str)
			     && (str.cs[byte_index+1]&0xC0)==0x80 
			     && (str.cs[byte_index+2]&0xC0)==0x80 
			     && (str.cs[byte_index+3]&0xC0)==0x80 
			     && (str.cs[byte_index+4]&0xC0)==0x80)
			return byte_index+5;
		else
			++byte_index;
	}else if( (str.cs[byte_index]&0xFE) == 0xFC)
	{
		if(byte_index+5<wonky_string_number_of_bytes(str)
			     && (str.cs[byte_index+1]&0xC0)==0x80 
			     && (str.cs[byte_index+2]&0xC0)==0x80 
			     && (str.cs[byte_index+3]&0xC0)==0x80 
			     && (str.cs[byte_index+4]&0xC0)==0x80 
			     && (str.cs[byte_index+5]&0xC0)==0x80)
			return 6;
		else
			return 0;
	}else
	{
		return 0;
	}
}
uint32_t wonky__utf8_glyph_value(struct wonky_str str,size_t byte_index,short glyph_size)
{
	uint32_t ret=str.cs[byte_index]&(0xff>>glyph_size);
	for(short i=1;i<glyph_size;++i)
	{
		ret<<=6;
		ret+=(0x3F&str.cs[byte_index+i]);
	}
	return ret;
}
short wonky__utf8_codepoint_size(uint32_t codepoint)
{
	if(codepoint<=0x7Fu)
		return 1;
	else if(codepoint<=0x7FFu)
		return 2;
	else if(codepoint<=0xFFFFu)
		return 3;
	else if(codepoint<=0x1FFFFFu)
		return 4;
	else if(codepoint<=0x3FFFFFFu)
		return 5;
	else if(codepoint<=0x7FFFFFFFu)
		return 6;
	else
		return 0;

}
void wonky__utf8_encode_codepoint(unsigned char *where,uint32_t codepoint,short size)
{
	if(size==1)
	{
		where[0]=codepoint;
	}else
	{
		where[0]=(0xFF<<(8-size))|(codepoint>>((size-1)*6));
		for(short i=1;i<size;++i)
			where[i]=0x80|((codepoint>>((size-i-1)*6))&0x3F);
	}
}

struct wonky_stream wonky_string_stream(struct wonky_str *str)
{
	struct wonky_string__stream_state *state=wonky_malloc(sizeof(struct wonky_string__stream_state));

	if(state)
	{
		state->str=str;
		state->where=0;
	}
	return (struct wonky_stream){
		.read=wonky_string__stream_read,
		.write=wonky_string__stream_write,
		.fseek=wonky_string__stream_fseek,
		.eof=wonky_string__stream_eof,
		.type=WONKY_STREAM_TYPE_STRING,
		.state=state
	};			
}
void wonky_string_stream_delete(struct wonky_stream *stream)
{
	/*currently not much point in freeing anything*/
}
ssize_t wonky_string__stream_read(void *state,void *dst,size_t num_bytes)
{
	if(state==NULL)
		return -1;
	struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
	uint8_t *d=(uint8_t*)dst;
	size_t i;

	if(wonky_string_oom(*s->str))
		return -1;
	
	for(s->where,i=0;s->where<wonky_arr_size(s->str->cs)-1 && i<num_bytes;++i,++s->where)
		d[i]=s->str->cs[i];

	return i;
}
ssize_t wonky_string__stream_write(void *state,void *src,size_t num_bytes)
{
	if(state==NULL)
		return -1;
	struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
	uint8_t *sr=(uint8_t*)src;
	size_t i;

	if(wonky_string_oom(*s->str))
		return -1;

	for(s->where,i=0;s->where<wonky_arr_size(s->str->cs)-1 && i<num_bytes;++i,++s->where)
		s->str->cs[i]=sr[i];
	for(i;i<num_bytes && !wonky_string_oom(*s->str);++i,++s->where)
		wonky_string_push_byte(s->str,(unsigned char)sr[i]);
	wonky_string_recount_glyphs(s->str);

	return i;
}
_Bool wonky_string__stream_fseek(void *state,size_t where,int whence)
{
	if(state==NULL)
		return 0;

	struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;

	if(wonky_string_oom(*s->str))
		return -1;

	switch(whence)
	{
		case SEEK_SET:
			if(where<wonky_arr_size(s->str->cs)-1)
			{
				s->where=where;
				return 1;
			}else
			{
				s->where=wonky_arr_size(s->str->cs)-1;
				return 0;
			}
		case SEEK_CUR:
			if(where<wonky_arr_size(s->str->cs)-s->where-1)
			{
				s->where+=where;
				return 1;
			}else
			{
				s->where=wonky_arr_size(s->str->cs)-1;
				return 0;
			}
		case SEEK_END:
			if(where<wonky_arr_size(s->str->cs)-1)
			{
				s->where=wonky_arr_size(s->str->cs)-1-where;
				return 1;
			}else
			{
				s->where=wonky_arr_size(s->str->cs)-1;
				return 0;
			}
		default:
			return 0;
	}
	return 0;
}
_Bool wonky_string__stream_eof(void *state)
{
	if(state==NULL)
		return 1;

	struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
	return s->where<wonky_arr_size(s->str->cs)-1;
}
#endif