Skip to content

Commit

Permalink
Add import file caching, fix bug in output order.
Browse files Browse the repository at this point in the history
  • Loading branch information
James.Hester committed Nov 18, 2021
1 parent 6accdf9 commit e7b59ea
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "CrystalInfoFramework"
uuid = "6007d9b0-c6b2-11e8-0510-1d10e825f3f1"
authors = ["James.Hester <[email protected]>"]
version = "0.4.5"
version = "0.4.6"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand Down
8 changes: 7 additions & 1 deletion src/cif_output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1018,11 +1018,16 @@ recurse_sort(cat,l) = begin
return final
end

"""
Sort all of the names in `cat`, putting SU data names directly after
their primary data names.
"""
sort_item_names(d,cat) = begin
start_list = sort(get_names_in_cat(d,cat))
# now find any su values
sus = filter(start_list) do x
:purpose in propertynames(d[x][:type]) && d[x][:type].purpose[] == "SU"
direct = :purpose in propertynames(d[x][:type]) && d[x][:type].purpose[] == "SU"
direct || haskey(d[x],:import) && check_import_block(d,x,:type,:purpose,"SU")
end
links = map(x->(x,lowercase(d[x][:name].linked_item_id[])),sus)
for (s,l) in links
Expand Down Expand Up @@ -1096,3 +1101,4 @@ show(io::IO,::MIME"text/cif",ddl2_dic::DDL2_Dictionary) = begin
end
end
end

142 changes: 121 additions & 21 deletions src/ddlm_dictionary_ng.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export find_head_category,add_head_category!
export get_julia_type_name,get_dimensions
export conform_capitals
export add_definition!,add_definition #add new definitions
export check_import_block #inspect an import block
import Base.show

"""
Expand All @@ -59,6 +60,8 @@ struct DDLm_Dictionary <: AbstractCifDictionary
def_meths_text::Dict{Tuple,Expr}
namespace::String
header_comments::String
original_file::String #for looking at imports later
cached_imports::Dict{Any,DDLm_Dictionary}
end

"""
Expand All @@ -67,30 +70,42 @@ end
Create a `DDLm_Dictionary` from `c`. `ignore_imports = true` will
ignore any `import` attributes.
"""
DDLm_Dictionary(c::Cif;ignore_imports=false) = begin
DDLm_Dictionary(c::Cif;kwargs...) = begin
if length(keys(c))!= 1
error("Error: Cif dictionary has more than one data block")
end
return DDLm_Dictionary(first(c).second,ignore_imports=ignore_imports,header=get_header_comments(c))
return DDLm_Dictionary(first(c).second;header=get_header_comments(c),kwargs...)
end

"""
DDLm_Dictionary(a::AbstractPath;verbose=false,ignore_imports=false)
DDLm_Dictionary(a::AbstractPath;verbose=false,ignore_imports=false,cache_imports=false)
Create a `DDLm_Dictionary` given filename `a`. `verbose = true` will print
extra debugging information during reading.`ignore_imports = true` will ignore
any `import` attributes.
any `import` attributes. `cache_imports` will store the contents of imported
files (`Contents` mode only) but will not merge the contents into the
importing definition.
Setting `ignore_imports` to `false` (the default) merges all information in
imported files into the dictionary, replacing the `import` attribute.
By default imports are cached, even if they are not
merged. `cache_imports` can be set to `false` to completely ignore any
import attributes.
`cache_imports` is ignored if `ignore_imports` is `false`.
"""
DDLm_Dictionary(a::AbstractPath;verbose=false,ignore_imports=false) = begin
DDLm_Dictionary(a::AbstractPath;verbose=false,kwargs...) = begin
c = Cif(a,verbose=verbose,native=true) #Native to catch header comments
DDLm_Dictionary(c,ignore_imports=ignore_imports)
DDLm_Dictionary(c;kwargs...)
end

DDLm_Dictionary(a::String;verbose=false,ignore_imports=false) = begin
DDLm_Dictionary(Path(a),verbose=verbose,ignore_imports=ignore_imports)
DDLm_Dictionary(a::String;kwargs...) = begin
DDLm_Dictionary(Path(a);kwargs...)
end

DDLm_Dictionary(b::CifBlock;ignore_imports=false,header="") = begin
DDLm_Dictionary(b::CifBlock;ignore_imports=false,header="",cache_imports=true) = begin
all_dict_info = Dict{Symbol,DataFrame}()
# Namespace
nspace = get(b,"_dictionary.namespace",[""])[]
Expand Down Expand Up @@ -131,35 +146,40 @@ DDLm_Dictionary(b::CifBlock;ignore_imports=false,header="") = begin
new_vals = (b[x][] for x in dnames)
update_row!(all_dict_info,Dict(zip(dnames,new_vals)),CaselessString("master_id"),title)
end
# process imports - could we do this separately?
cache = Dict()
# process imports
if cache_imports || !ignore_imports
cache = import_cache(all_dict_info,b.original_file)
end
if !ignore_imports
resolve_imports!(all_dict_info,b.original_file)
resolve_imports!(all_dict_info,b.original_file,cache)
end
# Apply default values if not a template dictionary
# Apply default values if not a template dictionary
if all_dict_info[:dictionary][!,:class][] != "Template"
enter_defaults(all_dict_info)
end
if all_dict_info[:dictionary].class[] == "Reference"
extra_reference!(all_dict_info)
end
DDLm_Dictionary(all_dict_info,nspace,header=header)
DDLm_Dictionary(all_dict_info,nspace,header=header,origin=b.original_file,
imports=cache)
end

"""
DDLm_Dictionary(attr_dict::Dict{Symbol,DataFrame},nspace,header="")
DDLm_Dictionary(attr_dict::Dict{Symbol,DataFrame},nspace,header="",origin="")
The symbol keys in `attr_dict` are DDLm attribute categories,
and the columns in the indexed `DataFrame`s are the object_ids
of the DDLm attributes of that category. `header` are optional comments
to be output at the top of the dictionary.
"""
DDLm_Dictionary(attr_dict::Dict{Symbol,DataFrame},nspace;header="") = begin
DDLm_Dictionary(attr_dict::Dict{Symbol,DataFrame},nspace;header="",origin="",imports=Dict()) = begin
# group for efficiency
gdf = Dict{Symbol,GroupedDataFrame}()
for k in keys(attr_dict)
gdf[k] = groupby(attr_dict[k],:master_id)
end
DDLm_Dictionary(gdf,Dict(),Dict(),Dict(),Dict(),nspace,header)
DDLm_Dictionary(gdf,Dict(),Dict(),Dict(),Dict(),nspace,header,origin,imports)
end

"""
Expand Down Expand Up @@ -1066,13 +1086,53 @@ to_path(u::URI) = begin
end

"""
resolve_imports!(d::Dict{Symbol,DataFrame},original_file)
import_cache(d,original_file)
Return an array with all import template files as DDLm dictionaries
ready for use. This routine is intended to save time re-reading
the imported files.
"""
import_cache(d,original_file) = begin
cached_dicts = Dict()
if !haskey(d,:import) return cached_dicts end
original_dir = dirname(original_file)
for one_row in eachrow(d[:import])
import_table = one_row.get
for one_entry in import_table
import_def = missing
# println("one import instruction: $one_entry")
(location,block,mode,if_dupl,if_miss) = get_import_info(original_dir,one_entry)
if mode == "Full"
continue # these are done separately
end
# Now carry out the import
if !(location in keys(cached_dicts))
#println("Now trying to import $location")
try
cached_dicts[location] = DDLm_Dictionary(location)
catch y
println("Error $y, backtrace $(backtrace())")
if if_miss == "Exit"
throw(error("Unable to find import for $location"))
end
continue
end
end

end
end
return cached_dicts
end

"""
resolve_imports!(d::Dict{Symbol,DataFrame},original_file,cache)
Replace all `_import.get` statements with the contents of the imported dictionary.
`cache` contains a list of pre-imported files.
"""
resolve_imports!(d::Dict{Symbol,DataFrame},original_file) = begin
resolve_imports!(d::Dict{Symbol,DataFrame},original_file,cache) = begin
if !haskey(d,:import) return d end
resolve_templated_imports!(d,original_file)
resolve_templated_imports!(d,original_file,cache)
new_c = resolve_full_imports!(d,original_file)
# remove all imports
delete!(d,:import)
Expand All @@ -1094,8 +1154,7 @@ get_import_info(original_dir,import_entry) = begin
return location,block,mode,if_dupl,if_miss
end

resolve_templated_imports!(d::Dict{Symbol,DataFrame},original_file) = begin
cached_dicts = Dict() # so as not to read twice
resolve_templated_imports!(d::Dict{Symbol,DataFrame},original_file,cached_dicts) = begin
original_dir = dirname(original_file)
for one_row in eachrow(d[:import])
import_table = one_row.get
Expand Down Expand Up @@ -1281,6 +1340,47 @@ resolve_full_imports!(d::Dict{Symbol,DataFrame},original_file) = begin
return d
end

"""
check_import_block(d::DDLm_Dictionary,name,attribute,val)
Check if the definition for `name` contains `attribute` equal
to `val` within an import block. `val` should be a single value.
"""
check_import_block(d::DDLm_Dictionary,name,cat,obj,val) = begin
x = d[name]
if !haskey(x,:import) || nrow(x[:import])!=1 return false end
spec = x[:import].get[]
if length(spec) > 1
println("Warning: cannot introspect multiple imports")
return false
end
spec = spec[]
if get(spec,"mode","Contents") == "Full" return false end
templ_file_name = joinpath(Path(dirname(d.original_file)),spec["file"])
if !(templ_file_name in keys(d.cached_imports))
println("Warning: cannot find $templ_file_name when checking imports for $name")
return false
end
templates = d.cached_imports[templ_file_name]
target_block = templates[spec["save"]]
# Find what we care about
if haskey(target_block,cat)
df = target_block[cat]
if obj in propertynames(df)
v = df[:,obj]
return length(v) == 1 && v[] == val
end
end
return false
end

check_import_block(d::DDLm_Dictionary,name,attribute,val) = begin
cat,obj = split(attribute,".")
cat = Symbol(cat[2:end])
obj = Symbol(obj)
check_import_block(d,name,cat,obj,val)
end

"""
Default values for DDLm attributes
"""
Expand Down
7 changes: 7 additions & 0 deletions test/dictionaries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ prepare_system() = begin
t = DDLm_Dictionary(joinpath(@__PATH__,"cif_mag.dic"))
end

@testset "Introspecting imports" begin
ud = DDLm_Dictionary(joinpath(@__PATH__,"cif_mag.dic"),ignore_imports=true)
@test check_import_block(ud,"_atom_site_rotation.label",:name,:linked_item_id,"_atom_site.label")
@test !check_import_block(ud,"_atom_site_rotation.label",:type,:purpose,"Junk")
end

@testset "DDLm_Dictionaries" begin
t = prepare_system()
@test "_audit_conform.dict_name" in get_names_in_cat(t,"audit_conform")
Expand Down Expand Up @@ -81,6 +87,7 @@ end
@test get_parent_category(ud,"structure") == "magnetic"
end


@testset "DDLm reference dictionaries" begin
t = DDLm_Dictionary(joinpath(@__PATH__,"ddl.dic"))
@test "_definition.master_id" in keys(t)
Expand Down

0 comments on commit e7b59ea

Please sign in to comment.