#ifndef WONKY_STRING_C
#define WONKY_STRING_C WONKY_STRING_C
#include <wonky_string.h>
struct wonky_str wonky_string_make()
{
struct wonky_str ret;
ret.cs=wonky_arr_alloc(1,1);
if(!wonky_arr_oom(ret.cs))
{
ret.cs[0]='\0';
ret.number_of_glyphs=1;/*\0 is a glyph*/
}else
{
ret.cs=NULL;
ret.number_of_glyphs=0;
}
return ret;
}
struct wonky_str wonky_string_copy(const struct wonky_str str)
{
struct wonky_str ret=(struct wonky_str){.cs=wonky_arr_copy(str.cs),.number_of_glyphs=str.number_of_glyphs};
wonky_string_recount_glyphs(&ret);
return ret;
}
struct wonky_str wonky_string_from_cstr(const char *str)
{
struct wonky_str ret;
if(str)
{
size_t str_len;
str_len=gstrlen(str);
ret.cs=wonky_arr_alloc(str_len+1,1);
ret.number_of_glyphs=1;
if(!wonky_arr_oom(ret.cs))
gmemmove(ret.cs,str,str_len+1);
wonky_string_recount_glyphs(&ret);
return ret;
}else
{
ret.cs=NULL;
ret.number_of_glyphs=0;
}
return ret;
}
size_t wonky_string_number_of_bytes(const struct wonky_str str)
{
if(!wonky_arr_oom(str.cs))
return wonky_arr_size(str.cs);
else
return 0;
}
size_t wonky_string_recount_glyphs(struct wonky_str *str)
{
size_t glyph_count=0;
for(size_t i=0,j=1;j!=i && i<wonky_string_number_of_bytes(*str);j=i,i=wonky_string_following_glyph_position(*str,i))
{
++glyph_count;
}
str->number_of_glyphs=glyph_count;
return glyph_count;
}
size_t wonky_string_length(const struct wonky_str str)
{
return str.number_of_glyphs;
}
_Bool wonky_string_oom(struct wonky_str str)
{
return wonky_arr_oom(str.cs);
}
/*get element(unicode) value from index'th glyph, indices starting from 0*/
uint32_t wonky_string_glyph(const struct wonky_str str,size_t glyph_index)
{
/*get byte index of glyph then extract the value and return it*/
return wonky_string_glyph_at_position(str,wonky_string_glyph_position(str,glyph_index));
}
/*returns the index of the starting byte of the utf8 encoded glyph_indexed glyph*/
size_t wonky_string_glyph_position(const struct wonky_str str,size_t glyph_index)
{
size_t byte_index,i;
/*walk glyphs till we reach glyph_index or run out of bytes*/
for(byte_index=0,i=0;i<glyph_index;++i,byte_index=wonky_string_following_glyph_position(str,byte_index));
/*if this could be last byte index in string*/
return byte_index;
}
/*returns index of starting byte of next valid glyph after byte_index'th byte in string*/
size_t wonky_string_following_glyph_position(const struct wonky_str str,size_t byte_index)
{
short glyph_size=0;
/*skip invalid bytes*/
while(byte_index<wonky_string_number_of_bytes(str) && (glyph_size=wonky__utf8_glyph_size(str,byte_index))==0)
{
++byte_index;
}
byte_index+=glyph_size;
return (byte_index<wonky_string_number_of_bytes(str)?byte_index:wonky_arr_last_index(str.cs)+1);
}
/*
returns glyph value of utf8 encoded glyph starting at byte_index'th index'th byte of string
if utf8 encoded glyph at byte_index'th byte is invalid this function returns 0
*/
uint32_t wonky_string_glyph_at_position(const struct wonky_str str,size_t byte_index)
{
short glyph_size;
glyph_size=wonky__utf8_glyph_size(str,byte_index);
if(glyph_size==0)
return 0;
else
return wonky__utf8_glyph_value(str,byte_index,glyph_size);
}
_Bool wonky_string_push_codepoint(struct wonky_str *str,uint32_t ch)
{
short codepoint_size;
codepoint_size=wonky__utf8_codepoint_size(ch);
str->cs=wonky_arr_expand(str->cs,codepoint_size);
if(wonky_arr_oom(str->cs))
return 0;
str->cs[wonky_arr_last_index(str->cs)]='\0';
wonky__utf8_encode_codepoint(str->cs+wonky_arr_last_index(str->cs)-codepoint_size,ch,codepoint_size);
++str->number_of_glyphs;
return 1;
}
_Bool wonky_string_push_byte(struct wonky_str *str,unsigned char ch)
{
if(str==NULL || wonky_arr_oom(str->cs))
return 0;
str->cs=wonky_arr_expand(str->cs,1);
if(wonky_arr_oom(str->cs))
return 0;
str->cs[wonky_arr_last_index(str->cs)]='\0';
str->cs[wonky_arr_last_index(str->cs)-1]=ch;
return 1;
}
_Bool wonky_string_append(struct wonky_str *str,const char *right)
{
{
if(str==NULL || right==NULL)
return 0;
size_t right_size;
right_size=gstrlen(right);
str->cs=wonky_arr_expand(str->cs,right_size);
gmemmove(str->cs+wonky_arr_last_index(str->cs)-right_size,right,right_size);
str->cs[wonky_arr_last_index(str->cs)]='\0';
wonky_string_recount_glyphs(str);
return 1;
}
}
_Bool wonky_string_preppend(struct wonky_str *str,char *left)
{
if(str==NULL || left==NULL)
return 0;
size_t left_size;
left_size=gstrlen(left);
str->cs=wonky_arr_expand(str->cs,left_size);
gmemmove(str->cs+left_size,str->cs,wonky_string_number_of_bytes(*str)-left_size);
gmemmove(str->cs,left,left_size);
wonky_string_recount_glyphs(str);
return 1;
}
_Bool wonky_string_insert(struct wonky_str *str,char *infix,size_t glyph_index)
{
if(str==NULL || infix==NULL || glyph_index>wonky_string_length(*str)-1)
return 0;
size_t infix_size;
size_t where;
infix_size=gstrlen(infix);
where=wonky_string_glyph_position(*str,glyph_index);
str->cs=wonky_arr_expand(str->cs,infix_size);
gmemmove(str->cs+where+infix_size,str->cs+where,wonky_string_number_of_bytes(*str)-where-infix_size);
gmemmove(str->cs+where,infix,infix_size);
wonky_string_recount_glyphs(str);
return 1;
}
_Bool wonky_string_delete_chars(struct wonky_str *str,size_t starting_glyph_index,size_t number_of_glyphs)
{
if(str==NULL)
return 0;
size_t start_offset=wonky_string_glyph_position(*str,starting_glyph_index);
size_t offset=start_offset;
for(size_t i=0;i<number_of_glyphs;++i,offset=wonky_string_following_glyph_position(*str,offset));
if(offset==0 || offset>wonky_string_number_of_bytes(*str))
return 0;
gmemmove(str->cs+start_offset,str->cs+offset,wonky_string_number_of_bytes(*str)-offset+start_offset);
wonky_arr_shrink(str->cs,offset-start_offset);
wonky_string_recount_glyphs(str);
return 1;
}
_Bool wonky_string_eqal(const struct wonky_str a,const struct wonky_str b)
{
if(wonky_string_number_of_bytes(a)!=wonky_string_number_of_bytes(b))
return 0;
return !gmemcmp(a.cs,b.cs,wonky_string_number_of_bytes(a));
}
_Bool wonky_string_cequal(const struct wonky_str a,const char *b)
{
return !gstrn_cmp(a.cs,b,wonky_string_number_of_bytes(a));
}
int wonky_string_printf(struct wonky_str *destination,const char *format,...)
{
va_list args;
int ret;
va_start(args,format);
ret=wonky_string_vprintf(destination,format,args);
va_end(args);
return ret;
}
int wonky_string_vprintf(struct wonky_str *destination,const char *format,va_list args)
{
if(format==NULL || destination==NULL)
return 0;
struct wonky__scanformat fmt;
struct wonky_stream s=wonky_string_stream(destination);
short indentation=0;
wonky_fseek(&s,0,SEEK_END);
for(size_t i=0;format[i]!='\0';)
{
if(format[i]=='%')
{
wonky__parse_scan_format(format+i,&fmt);
wonky__from_scanformat(&fmt,args,&s,&indentation);
i+=fmt.forward_crawl;
}else
{
wonky_write(&s,(void*)format+i,1);
++i;
}
}
wonky_stream_delete(&s);
return 0;
}
short wonky__utf8_get_glyph_size_from_starting_codepoint(unsigned char leading_byte)
{
if(leading_byte<0x7F)
return 1;
else if(leading_byte&0xE0==0xC0)
return 2;
else if(leading_byte&0xF0==0xE0)
return 3;
else if(leading_byte&0xF8==0xF0)
return 4;
else if(leading_byte&0xFC==0xF8)
return 5;
else if(leading_byte&0xFE==0xFC)
return 6;
else
return 0;
}
short wonky__utf8_glyph_size(struct wonky_str str,size_t byte_index)
{
if(str.cs[byte_index]<0x7F) /*we were on top of an ascii char*/
{
return 1;
}else if((str.cs[byte_index]&0xE0) == 0xC0)
{
if(byte_index+2<wonky_string_number_of_bytes(str)
&& (str.cs[byte_index+1]&0xC0)==0x80)
return 2;
else
return 0;
}else if( (str.cs[byte_index]&0xF0) == 0xE0)
{
if(byte_index+3<wonky_string_number_of_bytes(str)
&& (str.cs[byte_index+1]&0xC0)==0x80
&& (str.cs[byte_index+2]&0xC0)==0x80)
return 3;
else
return 0;
}else if( (str.cs[byte_index]&0xF8) == 0xF0)
{
if(byte_index+4<wonky_string_number_of_bytes(str)
&& (str.cs[byte_index+1]&0xC0)==0x80
&& (str.cs[byte_index+2]&0xC0)==0x80
&& (str.cs[byte_index+3]&0xC0)==0x80)
return 4;
else
return 0;
}else if( (str.cs[byte_index]&0xFC) == 0xF8)
{
if(byte_index+5<wonky_string_number_of_bytes(str)
&& (str.cs[byte_index+1]&0xC0)==0x80
&& (str.cs[byte_index+2]&0xC0)==0x80
&& (str.cs[byte_index+3]&0xC0)==0x80
&& (str.cs[byte_index+4]&0xC0)==0x80)
return byte_index+5;
else
++byte_index;
}else if( (str.cs[byte_index]&0xFE) == 0xFC)
{
if(byte_index+5<wonky_string_number_of_bytes(str)
&& (str.cs[byte_index+1]&0xC0)==0x80
&& (str.cs[byte_index+2]&0xC0)==0x80
&& (str.cs[byte_index+3]&0xC0)==0x80
&& (str.cs[byte_index+4]&0xC0)==0x80
&& (str.cs[byte_index+5]&0xC0)==0x80)
return 6;
else
return 0;
}else
{
return 0;
}
}
uint32_t wonky__utf8_glyph_value(struct wonky_str str,size_t byte_index,short glyph_size)
{
uint32_t ret=str.cs[byte_index]&(0xff>>glyph_size);
for(short i=1;i<glyph_size;++i)
{
ret<<=6;
ret+=(0x3F&str.cs[byte_index+i]);
}
return ret;
}
short wonky__utf8_codepoint_size(uint32_t codepoint)
{
if(codepoint<=0x7Fu)
return 1;
else if(codepoint<=0x7FFu)
return 2;
else if(codepoint<=0xFFFFu)
return 3;
else if(codepoint<=0x1FFFFFu)
return 4;
else if(codepoint<=0x3FFFFFFu)
return 5;
else if(codepoint<=0x7FFFFFFFu)
return 6;
else
return 0;
}
void wonky__utf8_encode_codepoint(unsigned char *where,uint32_t codepoint,short size)
{
if(size==1)
{
where[0]=codepoint;
}else
{
where[0]=(0xFF<<(8-size))|(codepoint>>((size-1)*6));
for(short i=1;i<size;++i)
where[i]=0x80|((codepoint>>((size-i-1)*6))&0x3F);
}
}
struct wonky_stream wonky_string_stream(struct wonky_str *str)
{
struct wonky_string__stream_state *state=wonky_malloc(sizeof(struct wonky_string__stream_state));
if(state)
{
state->str=str;
state->where=0;
}
return (struct wonky_stream){
.read=wonky_string__stream_read,
.write=wonky_string__stream_write,
.fseek=wonky_string__stream_fseek,
.eof=wonky_string__stream_eof,
.type=WONKY_STREAM_TYPE_STRING,
.state=state
};
}
void wonky_string_stream_delete(struct wonky_stream *stream)
{
/*currently not much point in freeing anything*/
}
ssize_t wonky_string__stream_read(void *state,void *dst,size_t num_bytes)
{
if(state==NULL)
return -1;
struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
uint8_t *d=(uint8_t*)dst;
size_t i;
if(wonky_string_oom(*s->str))
return -1;
for(s->where,i=0;s->where<wonky_arr_size(s->str->cs)-1 && i<num_bytes;++i,++s->where)
d[i]=s->str->cs[i];
return i;
}
ssize_t wonky_string__stream_write(void *state,void *src,size_t num_bytes)
{
if(state==NULL)
return -1;
struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
uint8_t *sr=(uint8_t*)src;
size_t i;
if(wonky_string_oom(*s->str))
return -1;
for(s->where,i=0;s->where<wonky_arr_size(s->str->cs)-1 && i<num_bytes;++i,++s->where)
s->str->cs[i]=sr[i];
for(i;i<num_bytes && !wonky_string_oom(*s->str);++i,++s->where)
wonky_string_push_byte(s->str,(unsigned char)sr[i]);
wonky_string_recount_glyphs(s->str);
return i;
}
_Bool wonky_string__stream_fseek(void *state,size_t where,int whence)
{
if(state==NULL)
return 0;
struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
if(wonky_string_oom(*s->str))
return -1;
switch(whence)
{
case SEEK_SET:
if(where<wonky_arr_size(s->str->cs)-1)
{
s->where=where;
return 1;
}else
{
s->where=wonky_arr_size(s->str->cs)-1;
return 0;
}
case SEEK_CUR:
if(where<wonky_arr_size(s->str->cs)-s->where-1)
{
s->where+=where;
return 1;
}else
{
s->where=wonky_arr_size(s->str->cs)-1;
return 0;
}
case SEEK_END:
if(where<wonky_arr_size(s->str->cs)-1)
{
s->where=wonky_arr_size(s->str->cs)-1-where;
return 1;
}else
{
s->where=wonky_arr_size(s->str->cs)-1;
return 0;
}
default:
return 0;
}
return 0;
}
_Bool wonky_string__stream_eof(void *state)
{
if(state==NULL)
return 1;
struct wonky_string__stream_state *s=(struct wonky_string__stream_state*)state;
return s->where<wonky_arr_size(s->str->cs)-1;
}
#endif