Added get_enum method for DDLm dictionaries. Implemented

line prefixing and folding recognition on input. Version bump.
jamesrhester · Jul 14, 2021 · c3310fb · c3310fb
1 parent e98a8e4
commit c3310fb
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 18 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "CrystalInfoFramework"
 uuid = "6007d9b0-c6b2-11e8-0510-1d10e825f3f1"
 authors = ["James.Hester <[email protected]>"]
-version = "0.4.2"
+version = "0.4.3"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"

diff --git a/src/cif2_transformer.jl b/src/cif2_transformer.jl
@@ -51,21 +51,52 @@ strip_string(ss::String) = begin
     return ss[2:end-1]
 end
 
+unfold(sa) = begin
+    final = IOBuffer()
+    for one_line in sa
+        if match(r"\\\s*$",String(one_line)) !== nothing
+            write(final,one_line[1:findlast('\\',one_line)-1])
+        else
+            write(final,one_line)
+        end
+    end
+    return String(take!(final))
+end
+
+unprefix(sa,prefix) = begin
+    # check
+    bad = filter(x->x[1:length(prefix)] != prefix,sa)
+    if length(bad) > 0
+        throw(error("Line prefix '$prefix' missing from lines $bad"))
+    end
+    return map(x->x[length(prefix)+1:end],sa)
+end
+
 # We may have a \r\n combo in here so we have to
 # be a little careful. And the cr/lf at the end
 # of the last line is part of the delimiter
 @rule semi_string(t::TreeToCif,args) = begin
+    line_folding = false
+    prefix = ""
     all_chars = length(args[1])
     as_string = String(args[1])
-    no_semi = if as_string[2] == ';'
-        if all_chars > 2 as_string[3:end] else "" end
-    else
-        @assert as_string[3] == ';'
-        if all_chars > 3 as_string[4:end] else "" end
+    semi = findfirst(';',as_string)
+    no_semi = semi == all_chars ? "" : as_string[semi+1:end]
+    if length(no_semi) > 0 && match(r"\\\s*$",no_semi) !== nothing
+        no_semi = strip(no_semi)
+        line_folding = length(no_semi) == 1 || no_semi[end-1] == '\\'
+        if length(no_semi) > 1
+            prefix = no_semi[1:prevind(no_semi,findfirst('\\',no_semi))]
+        end
+        no_semi = ""
     end
     if length(args) == 2 final = no_semi
     else
-        final = no_semi*join(String.(args[2:end-1]))
+        if !line_folding && prefix == ""
+            final = no_semi*join(String.(args[2:end-1]))
+        else
+            final = no_semi*unfold(unprefix(args[2:end-1],prefix))
+        end
     end
     # chop off the very last line terminator if present
     if length(final)>1 && final[end-1:end] == "\r\n"

diff --git a/src/cif_output.jl b/src/cif_output.jl
@@ -540,8 +540,9 @@ end
     calc_ideal_spacing(colwidths)
 
 Calculate column start positions based on reported widths. Packets start at loop_align, with
-subsequent values aligned to loop align. Widths are (min,max) values, where min is the minimum
-possible width and max is the maximum possible width.  These are only different for compound
+subsequent values aligned to loop align. Widths are (lower,upper) named tuples, where lower
+is the minimum
+possible width and upper is the maximum possible width.  These are only different for compound
 data values.
 """
 calc_ideal_spacing(colwidths) = begin
@@ -550,7 +551,7 @@ calc_ideal_spacing(colwidths) = begin
     calc_widths = Int[] #locked-in widths
     old_p = 1
     line = 1
-    sumsofar = 0     #Current column
+    sumsofar = 0     #Final column so far without extra whitespace
     interim = Int[]  #values that may require addition of preceding whitespace
 
     # When starting a line we calculate the indent and then see if the
@@ -601,8 +602,10 @@ calc_ideal_spacing(colwidths) = begin
         if remainder < 0
             throw(error("Line overflow!"))
         end
-        push!(calc_starts,interim[1])
-        append!(calc_starts,interim[2:end] .+ remainder)
+        #println("Remainder is $remainder")
+        for i in 1:length(interim)
+            push!(calc_starts,interim[i] + remainder*(i-1))
+        end
         # adjust for two value rule
         #println("After remainder $interim $calc_starts")
         if length(interim) == 2 && interim[2] < value_col && calc_starts[end] > value_col
@@ -621,13 +624,13 @@ calc_ideal_spacing(colwidths) = begin
         end
         #println("Col $p: $sumsofar")
         if sumsofar + 2 + colwidths[p].upper <= line_length
-            push!(interim,sumsofar)
+            push!(interim,sumsofar+2)
             sumsofar += colwidths[p].upper + 2
             push!(calc_widths,colwidths[p].upper)
             continue
         end
         if sumsofar + 2 + colwidths[p].lower <= line_length
-            push!(interim,sumsofar)
+            push!(interim,sumsofar+2)
             push!(calc_widths,line_length - sumsofar - 2)
             sumsofar = line_length
             continue
@@ -766,13 +769,15 @@ show_set(io,cat,df;implicits=[],indents=[text_indent,value_col],order=(),
     pn = propertynames(df)
     colnames = length(order)>0 ? intersect(order,pn) : sort!(pn)
     leftindent = " "^indents[1]
+    # Add any missing key columns         
     for cl in colnames
         if cl in [:master_id,:__blockname,:__object_id] continue end
         if "$cat.$(String(cl))" in implicits continue end
         this_val = df[!,cl][]
         if ismissing(this_val) continue end
         if haskey(ddlm_defaults,(cat,cl)) && ddlm_defaults[(cat,cl)] == this_val
-            if !(cat == :type && cl in (:purpose,:source,:container,:contents)) continue
+            if !(cat == :type && cl in (:purpose,:source,:container,:contents)) &&
+                !(cat == :method && cl == :purpose) continue
             end
         end
         fullname = "_$cat.$cl"
@@ -887,7 +892,7 @@ to allow checking software easy access.
 const ddlm_toplevel_order = (:dictionary => (:title,:class,:version,:date,:uri,:ddl_conformance,
                                         :namespace),
                         :description => (:text,),
-                        :dictionary_valid => (:application,:attributes),
+                        :dictionary_valid => (:application,:scope,:option,:attributes),
                         :dictionary_audit => (:version,:date,:revision)
                         )
 
@@ -981,6 +986,7 @@ get_sorted_cats(d,cat) = begin
     cc = get_categories(d)
     catinfo = sort!([(c,get_parent_category(d,c)) for c in cc])
     filter!(x->x[1]!=x[2],catinfo)
+    println("Catinfo: $catinfo")
     sorted = recurse_sort(cat,catinfo)
     if length(sorted) != length(catinfo) - 1 #all except head
         orig = [x[1] for x in catinfo]

diff --git a/src/ddlm_dictionary_ng.jl b/src/ddlm_dictionary_ng.jl
@@ -38,6 +38,7 @@ export get_ultimate_link
 export get_default,lookup_default
 export get_dic_name
 export get_cat_class
+export get_enums          #get all enumerated lists
 export get_dic_namespace
 export is_category
 export find_head_category,add_head_category!
@@ -605,6 +606,24 @@ get_single_key_cats(d::DDLm_Dictionary) = begin
     end
 end
 
+"""
+    get_enums(d::DDLm_Dictionary)
+
+Return all items defined in `d` that take enumerated values, together
+with the list of values as a dictionary.
+"""
+
+get_enums(d::DDLm_Dictionary) = begin
+    res = Dict{String,Array{String,1}}()
+    for k in keys(d)
+        v = d[k]
+        if haskey(v,:enumeration_set) && nrow(v[:enumeration_set])>0
+            res[k] = v[:enumeration_set].state
+        end
+    end
+    return res
+end
+
 """
     get_ultimate_link(d::DDLm_Dictionary,dataname::AbstractString)
 
@@ -1240,8 +1259,12 @@ is changed.
 """
 enter_defaults(d) = begin
     for ((tab,obj),val) in ddlm_defaults
-        if haskey(d,tab) && obj in propertynames(d[tab])
-            d[tab][!,obj] = coalesce.(d[tab][!,obj],val)
+        if haskey(d,tab)
+            if obj in propertynames(d[tab])
+                d[tab][!,obj] = coalesce.(d[tab][!,obj],val)
+            else
+                insertcols!(d[tab],obj=>val)
+            end
         end
     end
 end