Skip to content

Commit

Permalink
Allow chain id to be assigned with chainid!() (#35)
Browse files Browse the repository at this point in the history
* Allow chain id to be assigned with chainid!()

Adds two function for assigning the chainid, chainid!(chain, new_id) and
chainid!(residue, new_id). For GitHub issue #22.

* Create a PDBConsistencyException

- Throw PDBConsistencyException when trying to change a chain's ID to an
ID that already exists.
- Throw PDBConsistencyException when moving a residue to a chain that
already has a residue with that number.
- Update the model's chain dictionary when changing a chain ID.

* Rename PDBConsistencyException to PDBConsistencyError

- Added a test for ensuring KeyError thrown when ChainID changed to
something else.
  • Loading branch information
gusennan authored Oct 26, 2021
1 parent d1fc832 commit 74a643a
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 4 deletions.
75 changes: 71 additions & 4 deletions src/model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export
ProteinStructure,
AtomRecord,
StructuralElementOrList,
PDBConsistencyError,
serial,
atomname,
altlocid,
Expand Down Expand Up @@ -46,6 +47,7 @@ export
resnames,
chain,
chainid,
chainid!,
resids,
residues,
model,
Expand Down Expand Up @@ -92,6 +94,7 @@ export
pairalign,
DataFrame


"A macromolecular structural element."
abstract type StructuralElement end

Expand All @@ -101,6 +104,14 @@ An atom that is part of a macromolecule - either an `Atom` or a
"""
abstract type AbstractAtom <: StructuralElement end

"""
Exception indicating something is inconsistent in the structure's
state.
"""
struct PDBConsistencyError <: Exception
msg::String
end

"An atom that is part of a macromolecule."
struct Atom <: AbstractAtom
serial::Int
Expand All @@ -127,7 +138,7 @@ A residue (amino acid) or other molecule - either a `Residue` or a
abstract type AbstractResidue <: StructuralElement end

"A residue (amino acid) or other molecule."
struct Residue <: AbstractResidue
mutable struct Residue <: AbstractResidue
name::String
number::Int
ins_code::Char
Expand All @@ -147,7 +158,7 @@ struct DisorderedResidue <: AbstractResidue
end

"A chain (molecule) from a macromolecular structure."
struct Chain <: StructuralElement
mutable struct Chain <: StructuralElement
id::String # mmCIF files can have multi-character chain IDs
res_list::Vector{String}
residues::Dict{String, AbstractResidue}
Expand Down Expand Up @@ -816,6 +827,62 @@ Get the chain ID of an `AbstractAtom`, `AbstractResidue` or `Chain` as a
chainid(el::Union{AbstractResidue, AbstractAtom}) = chainid(chain(el))
chainid(ch::Chain) = ch.id

"""
chainid!(ch, id)
Set the chain ID of an `Chain` to a new `String`.
"""
function chainid!(ch::Chain, id::String)
if haskey(ch.model.chains, id)
throw(PDBConsistencyError("Invalid ID ($(id)). The model already has a chain with this ID."))
end

old_id = ch.id
ch.id = id
ch.model.chains[id] = ch
delete!(ch.model.chains, old_id)
end

"""
chainid!(res, id)
Set the chain ID of an `AbstractResidue` to a new `String`.
If a chain with this ID already exists, it will be removed from its current
chain and added to that chain. If a chain with this ID does not exist, a new
chain will be added to the model and this residue will be added to it. If
moving this residue from a chain to a new chain renders the old chain without
residues, the old chain will be removed from the `Model`.
"""
function chainid!(res::AbstractResidue, id::String)

current_chain = res.chain
model_chains = current_chain.model.chains

# find the currently-assigned resid, which may not have been created from the resid function
current_resid = findfirst(isequal(res), current_chain.residues)

if id in keys(model_chains)
if haskey(model_chains[id].residues, current_resid) && model_chains[id].residues[current_resid] != res
throw(PDBConsistencyError("A residue with id ($(current_resid)) already exists in chain $(id). Cannot copy this residue there"))
end

model_chains[id].residues[resid(res)] = res
else
model_chains[id] = Chain(id, [], Dict(current_resid => res), current_chain.model)
end
res.chain = model_chains[id]

# remove the residue from its current chain
delete!(current_chain.residues, current_resid)
if isempty(current_chain.residues)
delete!(model_chains, current_chain.id)
end

fixlists!(structure(res))
end


"""
resids(ch)
Expand Down Expand Up @@ -1522,7 +1589,7 @@ fullresname(res::Residue) = res.name
function fixlists!(struc::ProteinStructure)
for mod in struc
for ch in mod
append!(ch.res_list, resid.(sort(collect(values(residues(ch))))))
ch.res_list = resid.(sort(collect(values(residues(ch)))))
for res in ch
if isa(res, Residue)
fixlists!(res)
Expand All @@ -1537,7 +1604,7 @@ function fixlists!(struc::ProteinStructure)
end

function fixlists!(res::Residue)
append!(res.atom_list, fullatomname.(sort(collect(values(atoms(res))))))
res.atom_list = fullatomname.(sort(collect(values(atoms(res)))))
end

fullatomname(at::Atom) = at.name
Expand Down
35 changes: 35 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,41 @@ end
@test chainid(dis_res) == "A"
@test chainid(ch) == "A"

# test modifying the chain ids
chainid!(ch, "C")
@test chainid(at) == "C"
@test chainid(dis_at) == "C"
@test chainid(res) == "C"
@test chainid(dis_res) == "C"
@test chainid(ch) == "C"
@test mod["C"] == ch
@test_throws KeyError mod["A"]

@test chainids(mod) == ["B", "C"]
chainid!(ch, "A")

# move one of the residues to a new chain and StructuralElements below it
# should identify on the new chain
chainid!(res, "C")
@test chainid(res) == "C"
@test chainid(at) == "C"
@test chainid(dis_at) == "C"
@test chainid(dis_res) == "A"
@test chainid(ch) == "A"

@test chainids(mod) == ["A", "B", "C"]

# Emptying a chain of residues by moving its residues deletes the chain
chainid!(res, "A")

@test chainids(mod) == ["A", "B"]

# reassigning a chainid to one that already exists throws an exception
@test_throws PDBConsistencyError chainid!(ch, "B")
struc['B'][10] = Residue("ALA", 10, ' ', false, struc['B'])
# reassigning a residue with a number to a chain that already has one of that number throws
@test_throws PDBConsistencyError chainid!(struc['B'][10], "A")

@test resids(ch) == ["10", "H_20A"]

@test isa(residues(ch), Dict{String, AbstractResidue})
Expand Down

0 comments on commit 74a643a

Please sign in to comment.