sql - Converting JSON file into postgresql, avoid duplicates -


i working on set of scripts in postgresql put json files tables.

i know postgresql can parse json want statistics, join between distinct kinds of objects, ...

thanks several githubs, made sql function can give attributes within object. it's recursive, , it's handling tables :

    create or replace function listattribut1job(jobid integer)  returns void $body$ declare     result attributejob; begin recursive doc_key_and_value_recursive(key, value) (   select     t.key,     t.value   jobs, json_each(jobs.job) t jobs.id=jobid    union    select     concat(doc_key_and_value_recursive.key, $$__$$, t.key),     case         when json_typeof(t.value) = 'array'          json_array_elements(t.value)         else t.value     end   doc_key_and_value_recursive,     json_each(         case          when json_typeof(doc_key_and_value_recursive.value) <> 'object' '{}' :: json         else doc_key_and_value_recursive.value     end) t ) insert jobsattributefulllist(     select jobid,replace(key,'.','_'),value      --respect des noms dans pgsql : pas de . ni de -> dans les column names     doc_key_and_value_recursive     json_typeof(doc_key_and_value_recursive.value) <> 'object'     ); end $body$ language 'plpgsql' ; 

then have table :

objectid | attributename     | attributevalue ---------|-------------------|------------ 1        | somename          | somevalue 2        | somename(same)    | anothervalue 

which want. problem : how avoid duplicates when have arrays? example, if json :

 {"param":[               {"name":"parm1","value":"csr"},               {"name":"parm2","value":"c"},               {"name":"parm3","value":"ifrs"}]}  

my function give me :

id; attributename            | value 1|"job__params__param__name" |""parm3"" 1|"job__params__param__name" |""parm2"" 1|"job__params__param__name" |""parm1"" 1|"job__params__param__value"|""ifrs"" 1|"job__params__param__value"|""csr"" 1|"job__params__param__value"|""c"" 

i have

id; attributename            | value 1|"job__params__param3__name" |""parm3"" 1|"job__params__param2__name" |""parm2"" 1|"job__params__param1__name" |""parm1"" 1|"job__params__param3__value"|""ifrs"" 1|"job__params__param2__value"|""csr"" 1|"job__params__param1__value"|""c"" 

so tried things 'if attribute name in (select attribute ...) not work.

i using function above in context:

open jobcursor (select id jobsjson); --      loop         fetch jobcursor record_job;         exit when not found;         perform listattribut1job(record_job.jobid);     end loop; 

(my objects called jobs). of course, can modify context.

would know way 'if attribute in attributetable attribute:=attribute1' or attribute2 if attribute1 si used, or ... ?

thank much

ps: if want full code can give it, wanted post easy read.

ok,

i tried update client application i'm lame @ php created big script avoid duplicates. feel free upgrade , comment if know how make better:

create or replace function avoidduplicatejobs(jobid integer) returns void $body$ declare querystring varchar; querystringgroup varchar; newan varchar; dupscursor refcursor; dupsnamecursor refcursor; dupsr record; namer record; idr record; indice integer; begin     querystring :=('select attribid, jobid, attributename,value jobsattributefulllist jafl '     ||'where (select count(*) jobsattributefulllist jafl2 '     ||'where jobid='||jobid||' , '     ||'jafl.attributename = jafl2.attributename) >1 '     ||' , jobid='||jobid     ||' order attributename, attribid  ');     drop table if exists dups;     create temp table dups(attribid integer, jobid integer , attributename varchar(100) not null, value varchar);      --raise info 'query : %',querystring; --  insert dups (attribid, jobid, attributename,value)      open dupscursor execute (querystring);     --execute querystring dupscursor;     --open dupsnamecursor for(select attributename dupscursor group attributename);     loop          fetch dupscursor dupsr;         exit when not found;         --raise info 'dups = %' ,dupsr;         insert  dups (attribid, jobid, attributename,value) values (dupsr.attribid,dupsr.jobid, dupsr.attributename, dupsr.value) ;     end loop;     close dupscursor;     open dupsnamecursor (select distinct attributename dups);     loop         fetch dupsnamecursor namer;         exit when not found;         raise info 'attributename = %' ,namer;         indice:=0;         querystringgroup:='select attribid dups attributename ='||$$'$$||namer.attributename||$$'$$||' order attribid;';         open dupscursor execute querystringgroup;         loop             fetch dupscursor idr;             exit when not found;             raise info 'attribid =%',idr;             indice:=indice +1;             querystring:= ('select attributename dups attribid='||idr.attribid);             execute querystring newan;             newan:=newan||indice;             raise info 'newan = %', newan;             querystring:= $$update jobsattributefulllist set attributename='$$||newan||$$' attribid=$$||idr.attribid||' ;';             raise info 'update query = %',querystring;             execute querystring;         end loop;         close dupscursor;      end loop;     close dupsnamecursor;   end; $body$ language plpgsql; 

Comments