mirror of https://github.com/pjreddie/darknet.git
This commit is contained in:
parent
f6afaabcdf
commit
d38a64807f
12
LICENSE
12
LICENSE
|
@ -1,12 +0,0 @@
|
|||
YOLO LICENSE
|
||||
Version 2, July 29 2016
|
||||
|
||||
THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER
|
||||
SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN
|
||||
TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES
|
||||
LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY. NOW HERE'S
|
||||
THE REAL LICENSE:
|
||||
|
||||
0. Darknet is public domain.
|
||||
1. Do whatever you want with it.
|
||||
2. Stop emailing me about it!
|
13
LICENSE.fuck
13
LICENSE.fuck
|
@ -1,13 +0,0 @@
|
|||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
91
LICENSE.gen
91
LICENSE.gen
|
@ -1,91 +0,0 @@
|
|||
RNN LICENSE Version 3, June 21 2017
|
||||
|
||||
Copyright (c) 1990, 1989, 1999 Free87337 May 48 THIRD PARTIES OR ANY OTHER THE
|
||||
COMPLAIN OR CONSEQUENTIAL DAMAGES AND REGARDLESS OF WHETHER IN CONTRACT, TO THE
|
||||
EXTENT REPAIR OR AGENTS (NOT THE IN ANY EVENT). THE SOFTWARE WILL BE
|
||||
UNINTERRUPTED OR ERROR-FREE OR ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF ALL THE WORK (GOVERNED CODE) HIM RESPONSES, OR OF FINES,
|
||||
SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR ANY OTHER OR OTHER HARL UNDER NO
|
||||
CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE),
|
||||
PATENT PERMITTED BY THE INSTAGRAM PARENT STATE OR TORT (INCLUDING NEGLIGENCE),
|
||||
PRODUCT LIABILITY OR OTHERWISE, ARISING OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR ANYTHING PROVIDED IN THIS PRODUCT, COMMIS AND SERVICES
|
||||
ARE LICENSED SOFTWARE AND ANY RESULE OR ANY OTHER THE COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY SPECIAL, INCIDENTAL, CASE, SUCH WARRANTIES, EXPRESS OR IMPLIED,
|
||||
INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COPYRIGHT HOLDERS AND/OR ANY
|
||||
PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY
|
||||
EXPRESS OR DISTRIBUTE THAT ALL CLAIMS ARE SHALL CREATE DERAVE BE LIABLE TO YOU
|
||||
WILL HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
6\. TERMINATION. TO THE EXTENT PERMITTED BY LAW, NO USE OF THE COVERED CODE IS
|
||||
WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE
|
||||
INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY
|
||||
SERVICING, REPAIR OR COULT OR IN ANY WAY OUT OF THE USE OF THE WEBSITES OR
|
||||
SERVICE WILL BE CONSEQUENTIAL DAMAGES OF ANY KIND HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
|
||||
This paragraph Agreement constitutes the entire agreement between the parties
|
||||
with respect to the Work licensed here. However, if you place the name of the
|
||||
fact that the arbitration was the consultation of the parties as a "patent is".
|
||||
Subject to the terms and conditions of this License, Contributor has knowledge
|
||||
that a license under a third party may also be used to endorse or promote
|
||||
products derived from the Work, and there is no warranty on the Software and
|
||||
Science Fees. For the purposes of this Agreement, attach the following
|
||||
disclaimers (without liabilities of written notice to the Subject Software) in a
|
||||
manner that a product is under common control with you. The Free Software
|
||||
Foundation may publish revised and/or new versions of the License for the
|
||||
Modifications made by the applicable terms. The Recipient shall promptly retain
|
||||
the covered works for any reason be entered in any federal or state or login
|
||||
Restricted Laws appearing in the United States or any of its own information
|
||||
that is not disabled from a derivative work except as expressly permitted in
|
||||
this License, to the extent that they are in receiving the Software and Source
|
||||
Code or any exercise of the rights granted to You by this License or a
|
||||
Contributor made by the Licensor or are authorized to make a reasonable
|
||||
retirement by the courts of the courts located in Santa Clara County, California
|
||||
printed and related to the Work or “Company” and Apache Software Foundation. If
|
||||
the Licensor shall be entitled to reflect your rights to use the Software and
|
||||
the Software to exercise the rights granted to the recipient without a
|
||||
requirement to exercise the rights granted by the Agreement to the provision
|
||||
will begin will appear in such cases, you will use such information without such
|
||||
corporation shall be an officer with respect to any part of the Software or any
|
||||
portion thereof. Capitalized terms are included in the Initial Contributor and
|
||||
under no circumstances will license the Service at any time and for any direct,
|
||||
indirect, special, incidental, or consequential damages of or assist in
|
||||
connection with any Services or the registration purposes only to the extent
|
||||
that it includes any or all means including the processing of which you download
|
||||
any derivative work. Any of the purchases’ transmission purposes are made
|
||||
available, if any, in other circumstances, we may review the copyright notice.
|
||||
In the event that this Agreement is required to give us strict content. The
|
||||
inclusion of the other party hereunder may also notify you Intellectual Property
|
||||
Rights to any third party. This means that the Source Code exists of the Work
|
||||
will not charge a program available to you at any time. You must include a
|
||||
prominent statement that the Software is governed under a particular version of
|
||||
this Agreement. You must include a provision to the extent that there is no
|
||||
warranty for the content of others. You agree that the Recipient was appointed
|
||||
as a Contributor, (c) are effective until terminated by hereunder, then the
|
||||
registration are not disabled and not limited to, submit any Customer Data
|
||||
without the updated use of the Software and that no fee is released. You grant
|
||||
to Use Other Arbitration Rules for Diagnostic or Services may use or modify the
|
||||
Apple Software and Consolidated Apple Software or Services. The Company may have
|
||||
full risk as a product of the Compatible Source. A Contribution by the Licensor
|
||||
or by the updated Software under the following conditions we can redistribute
|
||||
any General Provision of this Agreement. If the Program is used in accordance
|
||||
with the terms of this Agreement, Customer may provide advertisements from your
|
||||
devices that clause you can your employer or a transaction or country that has
|
||||
been controlled by the arbitrator, that they will be useful of this Agreement.
|
||||
The term "Open Source Software is available in connection with the program, and
|
||||
you may not protect the combination of the Covered Code. You should like to
|
||||
select a user's rights to charge a copy of this License. I are Contributor's
|
||||
confidentiality of the exercise of the rights granted herein. Such a covered
|
||||
work is released as a consequence, the Licensor shall be eligible for a purpose
|
||||
or subcontractor of the person or entity to the user of the user, then the word
|
||||
"Application" means having the original fee for any reason; and that no patent
|
||||
license to more than fifty stated close of the license term. The terms of this
|
||||
License will the license terms and conditions set forth in Section 2.2 (OPEC)
|
||||
and You will not use the Software or any set of responsibility for any resulting
|
||||
information that the Original Code warrants that you have the right to disclose
|
||||
these information (or in the notification; or (iii) late use of the software or
|
||||
any third party to the three (50) days before such belief to the extent that it
|
||||
includes a court court obtains the rights granted by this License.
|
674
LICENSE.gpl
674
LICENSE.gpl
|
@ -1,674 +0,0 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
{one line to give the program's name and a brief idea of what it does.}
|
||||
Copyright (C) {year} {name of author}
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
{project} Copyright (C) {year} {fullname}
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
|
@ -1,8 +0,0 @@
|
|||
META-LICENSE
|
||||
Version 1, June 21 2017
|
||||
|
||||
Any and all licenses may be applied to the software either individually
|
||||
or in concert. Any issues, ambiguities, paradoxes, or metaphysical quandries
|
||||
arising from this combination should be discussed with a local faith leader,
|
||||
hermit, or guru. The Oxford comma shall be used.
|
||||
|
22
LICENSE.mit
22
LICENSE.mit
|
@ -1,22 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2017 Joseph Redmon
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
13
LICENSE.v1
13
LICENSE.v1
|
@ -1,13 +0,0 @@
|
|||
YOLO LICENSE
|
||||
Version 1, July 10 2015
|
||||
|
||||
THIS SOFTWARE LICENSE IS PROVIDED "ALL CAPS" SO THAT YOU KNOW IT IS SUPER
|
||||
SERIOUS AND YOU DON'T MESS AROUND WITH COPYRIGHT LAW BECAUSE YOU WILL GET IN
|
||||
TROUBLE HERE ARE SOME OTHER BUZZWORDS COMMONLY IN THESE THINGS WARRANTIES
|
||||
LIABILITY CONTRACT TORT LIABLE CLAIMS RESTRICTION MERCHANTABILITY SUBJECT TO
|
||||
THE FOLLOWING CONDITIONS:
|
||||
|
||||
1. #yolo
|
||||
2. #swag
|
||||
3. #blazeit
|
||||
|
73
Makefile
73
Makefile
|
@ -1,17 +1,9 @@
|
|||
GPU=0
|
||||
GPU=1
|
||||
CUDNN=0
|
||||
OPENCV=0
|
||||
OPENMP=0
|
||||
DEBUG=0
|
||||
|
||||
ARCH= -gencode arch=compute_30,code=sm_30 \
|
||||
-gencode arch=compute_35,code=sm_35 \
|
||||
-gencode arch=compute_50,code=[sm_50,compute_50] \
|
||||
-gencode arch=compute_52,code=[sm_52,compute_52]
|
||||
# -gencode arch=compute_20,code=[sm_20,sm_21] \ This one is deprecated?
|
||||
|
||||
# This is what I use, uncomment if you know your arch and want to specify
|
||||
# ARCH= -gencode arch=compute_52,code=compute_52
|
||||
|
||||
VPATH=./src/:./examples
|
||||
SLIB=libdarknet.so
|
||||
|
@ -19,49 +11,69 @@ ALIB=libdarknet.a
|
|||
EXEC=darknet
|
||||
OBJDIR=./obj/
|
||||
|
||||
CC=gcc
|
||||
CPP=g++
|
||||
NVCC=nvcc
|
||||
# 设置编译参数
|
||||
AR=ar
|
||||
ARFLAGS=rcs
|
||||
OPTS=-Ofast
|
||||
LDFLAGS= -lm -pthread
|
||||
LDFLAGS= -lm -pthread
|
||||
COMMON= -Iinclude/ -Isrc/
|
||||
CFLAGS=-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC
|
||||
CFLAGS= -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-write-strings -fPIC
|
||||
|
||||
ifeq ($(OPENMP), 1)
|
||||
CC=gcc
|
||||
CPP=g++
|
||||
ifeq ($(GPU), 1)
|
||||
HIP_ROOT_PATH=/opt/dtk-22.04.2
|
||||
|
||||
CC=${HIP_ROOT_PATH}/bin/hipcc
|
||||
CPP=${HIP_ROOT_PATH}/bin/hipcc
|
||||
NVCC=${HIP_ROOT_PATH}/bin/hipcc
|
||||
COMMON+= -DGPU -I${HIP_ROOT_PATH}/include/ -I${HIP_ROOT_PATH}/rocrand/include/ -I${HIP_ROOT_PATH}/hiprand/include/ -I${HIP_ROOT_PATH}/hipblas/include/
|
||||
CFLAGS+= -DGPU -D__HIP_PLATFORM_HCC__
|
||||
LDFLAGS+= -L${HIP_ROOT_PATH}/lib64 -lhipblas -lhiprand
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(OPENMP), 1)
|
||||
CFLAGS+= -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
ifeq ($(DEBUG), 1)
|
||||
OPTS=-O0 -g
|
||||
endif
|
||||
|
||||
CFLAGS+=$(OPTS)
|
||||
|
||||
ifeq ($(OPENCV), 1)
|
||||
ifeq ($(OPENCV), 1)
|
||||
COMMON+= -DOPENCV
|
||||
CFLAGS+= -DOPENCV
|
||||
LDFLAGS+= `pkg-config --libs opencv` -lstdc++
|
||||
COMMON+= `pkg-config --cflags opencv`
|
||||
COMMON+= `pkg-config --cflags opencv`
|
||||
endif
|
||||
|
||||
ifeq ($(GPU), 1)
|
||||
COMMON+= -DGPU -I/usr/local/cuda/include/
|
||||
CFLAGS+= -DGPU
|
||||
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
|
||||
endif
|
||||
#ifeq ($(GPU), 1)
|
||||
#COMMON+= -DGPU -I/usr/local/cuda/include/
|
||||
#CFLAGS+= -DGPU
|
||||
#LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
|
||||
#endif
|
||||
|
||||
ifeq ($(CUDNN), 1)
|
||||
COMMON+= -DCUDNN
|
||||
ifeq ($(CUDNN), 1)
|
||||
COMMON+= -DCUDNN
|
||||
CFLAGS+= -DCUDNN
|
||||
LDFLAGS+= -lcudnn
|
||||
endif
|
||||
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o image_opencv.o
|
||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o \
|
||||
softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o detection_layer.o route_layer.o upsample_layer.o box.o normalization_layer.o \
|
||||
avgpool_layer.o layer.o local_layer.o shortcut_layer.o logistic_layer.o activation_layer.o rnn_layer.o gru_layer.o crnn_layer.o demo.o batchnorm_layer.o region_layer.o \
|
||||
reorg_layer.o tree.o lstm_layer.o l2norm_layer.o yolo_layer.o iseg_layer.o
|
||||
ifeq ($(OPENCV), 1)
|
||||
OBJ+=image_opencv.o
|
||||
endif
|
||||
|
||||
EXECOBJA=captcha.o lsd.o super.o art.o tag.o cifar.o go.o rnn.o segmenter.o regressor.o classifier.o coco.o yolo.o detector.o nightmare.o instance-segmenter.o darknet.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
#EXECOBJA=darknet.o
|
||||
ifeq ($(GPU), 1)
|
||||
LDFLAGS+= -lstdc++
|
||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o avgpool_layer_kernels.o
|
||||
endif
|
||||
|
||||
|
@ -72,9 +84,8 @@ DEPS = $(wildcard src/*.h) Makefile include/darknet.h
|
|||
all: obj backup results $(SLIB) $(ALIB) $(EXEC)
|
||||
#all: obj results $(SLIB) $(ALIB) $(EXEC)
|
||||
|
||||
|
||||
$(EXEC): $(EXECOBJ) $(ALIB)
|
||||
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS) $(ALIB)
|
||||
$(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
$(ALIB): $(OBJS)
|
||||
$(AR) $(ARFLAGS) $@ $^
|
||||
|
@ -89,7 +100,7 @@ $(OBJDIR)%.o: %.c $(DEPS)
|
|||
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJDIR)%.o: %.cu $(DEPS)
|
||||
$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
|
||||
$(NVCC) -c $< -o $@ $(COMMON) $(CFLAGS)
|
||||
|
||||
obj:
|
||||
mkdir -p obj
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# Darknet
|
||||
|
||||
![Darknet Logo](http://pjreddie.com/media/files/darknet-black-small.png)
|
||||
|
||||
# Darknet #
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
classes= 20
|
||||
train = /home/pjreddie/data/voc/train.txt
|
||||
valid = /home/pjreddie/data/voc/2007_test.txt
|
||||
train = /home/public/DL_DATA/VOCdevkit0712/voc2007_2012/train.txt
|
||||
valid = /home/public/DL_DATA/VOCdevkit0712/voc2007_2012/2007_test.txt
|
||||
names = data/voc.names
|
||||
backup = backup
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=1
|
||||
subdivisions=1
|
||||
batch=64
|
||||
subdivisions=16
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=16
|
||||
|
||||
width=416
|
||||
height=416
|
||||
channels=3
|
||||
|
|
|
@ -17,7 +17,7 @@ hue=.1
|
|||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
max_batches = 500303
|
||||
policy=steps
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
model test:
|
||||
./darknet detect cfg/yolov3.cfg ./model_pretrained/yolov3.weights data/dog.jpg
|
||||
./darknet detect cfg/yolov3.cfg ./model_pretrained/yolov3.weights data/giraffe.jpg
|
||||
./darknet detect cfg/yolov3.cfg ../darknet_official/model_pretrained/yolov3.weights data/giraffe.jpg
|
||||
|
||||
|
||||
train model on voc:
|
||||
data preprocess:
|
||||
1.download voc data, 2007 and 2012, then copy file 'scripts/voc_label.py' to voc folder
|
||||
2.run voc_label.py, and change path of the cfg/voc.data
|
||||
*3.change parameters in 'cfg/yolov3.cfg' if need
|
||||
|
||||
train YOLOv3:
|
||||
./darknet detector train cfg/voc.data cfg/yolov3.cfg ./model_pretrained/yolov3.weights
|
||||
|
||||
test after train:
|
||||
./darknet detect cfg/yolov3.cfg ./backup/yolov3_final.weights data/dog.jpg
|
|
@ -7,7 +7,7 @@ void extend_data_truth(data *d, int n, float val)
|
|||
{
|
||||
int i, j;
|
||||
for(i = 0; i < d->y.rows; ++i){
|
||||
d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float));
|
||||
d->y.vals[i] = (float *) realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float));
|
||||
for(j = 0; j < n; ++j){
|
||||
d->y.vals[i][d->y.cols + j] = val;
|
||||
}
|
||||
|
@ -20,8 +20,8 @@ matrix network_loss_data(network *net, data test)
|
|||
int i,b;
|
||||
int k = 1;
|
||||
matrix pred = make_matrix(test.X.rows, k);
|
||||
float *X = calloc(net->batch*test.X.cols, sizeof(float));
|
||||
float *y = calloc(net->batch*test.y.cols, sizeof(float));
|
||||
float *X = (float*) calloc(net->batch*test.X.cols, sizeof(float));
|
||||
float *y = (float*) calloc(net->batch*test.y.cols, sizeof(float));
|
||||
for(i = 0; i < test.X.rows; i += net->batch){
|
||||
for(b = 0; b < net->batch; ++b){
|
||||
if(i+b == test.X.rows) break;
|
||||
|
@ -60,7 +60,7 @@ void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network **nets = calloc(ngpus, sizeof(network*));
|
||||
network **nets = (network **)calloc(ngpus, sizeof(network*));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
|
@ -152,7 +152,7 @@ void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
free_matrix(deltas);
|
||||
}
|
||||
}
|
||||
int *inds = calloc(resized.y.rows, sizeof(int));
|
||||
int *inds = (int *)calloc(resized.y.rows, sizeof(int));
|
||||
for(z = 0; z < resized.y.rows; ++z){
|
||||
int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs);
|
||||
inds[z] = index;
|
||||
|
@ -205,7 +205,7 @@ void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
avg_cls_loss = avg_cls_loss*.9 + closs*.1;
|
||||
avg_att_loss = avg_att_loss*.9 + aloss*.1;
|
||||
|
||||
printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
|
||||
printf("%ld, %.3f: Att: %f, %f avg, nclass: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
|
||||
if(*net->seen/N > epoch){
|
||||
epoch = *net->seen/N;
|
||||
char buff[256];
|
||||
|
@ -255,19 +255,19 @@ void validate_attention_single(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
float avg_acc = 0;
|
||||
float avg_topk = 0;
|
||||
int *indexes = calloc(topk, sizeof(int));
|
||||
int *indexes = (int *)calloc(topk, sizeof(int));
|
||||
int divs = 4;
|
||||
int size = 2;
|
||||
int extra = 0;
|
||||
float *avgs = calloc(classes, sizeof(float));
|
||||
int *inds = calloc(divs*divs, sizeof(int));
|
||||
float *avgs = (float*) calloc(classes, sizeof(float));
|
||||
int *inds = (int*) calloc(divs*divs, sizeof(int));
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
int class = -1;
|
||||
int nclass = -1;
|
||||
char *path = paths[i];
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(strstr(path, labels[j])){
|
||||
class = j;
|
||||
nclass = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -309,9 +309,9 @@ void validate_attention_single(char *datacfg, char *filename, char *weightfile)
|
|||
free_image(crop);
|
||||
top_k(pred, classes, topk, indexes);
|
||||
|
||||
if(indexes[0] == class) avg_acc += 1;
|
||||
if(indexes[0] == nclass) avg_acc += 1;
|
||||
for(j = 0; j < topk; ++j){
|
||||
if(indexes[j] == class) avg_topk += 1;
|
||||
if(indexes[j] == nclass) avg_topk += 1;
|
||||
}
|
||||
|
||||
printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
|
||||
|
@ -343,18 +343,18 @@ void validate_attention_multi(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
float avg_acc = 0;
|
||||
float avg_topk = 0;
|
||||
int *indexes = calloc(topk, sizeof(int));
|
||||
int *indexes = (int *)calloc(topk, sizeof(int));
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
int class = -1;
|
||||
int nclass = -1;
|
||||
char *path = paths[i];
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(strstr(path, labels[j])){
|
||||
class = j;
|
||||
nclass = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
float *pred = calloc(classes, sizeof(float));
|
||||
float *pred = (float*) calloc(classes, sizeof(float));
|
||||
image im = load_image_color(paths[i], 0, 0);
|
||||
for(j = 0; j < nscales; ++j){
|
||||
image r = resize_min(im, scales[j]);
|
||||
|
@ -370,9 +370,9 @@ void validate_attention_multi(char *datacfg, char *filename, char *weightfile)
|
|||
free_image(im);
|
||||
top_k(pred, classes, topk, indexes);
|
||||
free(pred);
|
||||
if(indexes[0] == class) avg_acc += 1;
|
||||
if(indexes[0] == nclass) avg_acc += 1;
|
||||
for(j = 0; j < topk; ++j){
|
||||
if(indexes[j] == class) avg_topk += 1;
|
||||
if(indexes[j] == nclass) avg_topk += 1;
|
||||
}
|
||||
|
||||
printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
|
||||
|
@ -394,7 +394,7 @@ void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *fil
|
|||
int i = 0;
|
||||
char **names = get_labels(name_list);
|
||||
clock_t time;
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
|
@ -120,8 +120,8 @@ void test_cifar_multi(char *filename, char *weightfile)
|
|||
axpy_cpu(10, 1, p, 1, pred, 1);
|
||||
|
||||
int index = max_index(pred, 10);
|
||||
int class = max_index(test.y.vals[i], 10);
|
||||
if(index == class) avg_acc += 1;
|
||||
int nclass = max_index(test.y.vals[i], 10);
|
||||
if(index == nclass) avg_acc += 1;
|
||||
free_image(im);
|
||||
printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1));
|
||||
}
|
||||
|
@ -154,16 +154,16 @@ char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","hors
|
|||
data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin");
|
||||
for(i = 0; i < train.X.rows; ++i){
|
||||
image im = float_to_image(32, 32, 3, train.X.vals[i]);
|
||||
int class = max_index(train.y.vals[i], 10);
|
||||
int nclass = max_index(train.y.vals[i], 10);
|
||||
char buff[256];
|
||||
sprintf(buff, "data/cifar/train/%d_%s",i,labels[class]);
|
||||
sprintf(buff, "data/cifar/train/%d_%s",i,labels[nclass]);
|
||||
save_image_options(im, buff, PNG, 0);
|
||||
}
|
||||
for(i = 0; i < test.X.rows; ++i){
|
||||
image im = float_to_image(32, 32, 3, test.X.vals[i]);
|
||||
int class = max_index(test.y.vals[i], 10);
|
||||
int nclass = max_index(test.y.vals[i], 10);
|
||||
char buff[256];
|
||||
sprintf(buff, "data/cifar/test/%d_%s",i,labels[class]);
|
||||
sprintf(buff, "data/cifar/test/%d_%s",i,labels[nclass]);
|
||||
save_image_options(im, buff, PNG, 0);
|
||||
}
|
||||
}
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
float *get_regression_values(char **labels, int n)
|
||||
{
|
||||
float *v = calloc(n, sizeof(float));
|
||||
float *v = (float*) calloc(n, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
char *p = strchr(labels[i], ' ');
|
||||
|
@ -23,7 +23,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network **nets = calloc(ngpus, sizeof(network*));
|
||||
network **nets = (network **)calloc(ngpus, sizeof(network*));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
|
@ -254,14 +254,14 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
float avg_acc = 0;
|
||||
float avg_topk = 0;
|
||||
int *indexes = calloc(topk, sizeof(int));
|
||||
int *indexes = (int *)calloc(topk, sizeof(int));
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
int class = -1;
|
||||
int nclass = -1;
|
||||
char *path = paths[i];
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(strstr(path, labels[j])){
|
||||
class = j;
|
||||
nclass = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -281,7 +281,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
|
|||
images[7] = crop_image(im, 0, 0, w, h);
|
||||
images[8] = crop_image(im, -shift, shift, w, h);
|
||||
images[9] = crop_image(im, shift, shift, w, h);
|
||||
float *pred = calloc(classes, sizeof(float));
|
||||
float *pred = (float*) calloc(classes, sizeof(float));
|
||||
for(j = 0; j < 10; ++j){
|
||||
float *p = network_predict(net, images[j].data);
|
||||
if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1);
|
||||
|
@ -291,9 +291,9 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
|
|||
free_image(im);
|
||||
top_k(pred, classes, topk, indexes);
|
||||
free(pred);
|
||||
if(indexes[0] == class) avg_acc += 1;
|
||||
if(indexes[0] == nclass) avg_acc += 1;
|
||||
for(j = 0; j < topk; ++j){
|
||||
if(indexes[j] == class) avg_topk += 1;
|
||||
if(indexes[j] == nclass) avg_topk += 1;
|
||||
}
|
||||
|
||||
printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
|
||||
|
@ -323,15 +323,15 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
float avg_acc = 0;
|
||||
float avg_topk = 0;
|
||||
int *indexes = calloc(topk, sizeof(int));
|
||||
int *indexes = (int *)calloc(topk, sizeof(int));
|
||||
|
||||
int size = net->w;
|
||||
for(i = 0; i < m; ++i){
|
||||
int class = -1;
|
||||
int nclass = -1;
|
||||
char *path = paths[i];
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(strstr(path, labels[j])){
|
||||
class = j;
|
||||
nclass = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -348,9 +348,9 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
|
|||
free_image(resized);
|
||||
top_k(pred, classes, topk, indexes);
|
||||
|
||||
if(indexes[0] == class) avg_acc += 1;
|
||||
if(indexes[0] == nclass) avg_acc += 1;
|
||||
for(j = 0; j < topk; ++j){
|
||||
if(indexes[j] == class) avg_topk += 1;
|
||||
if(indexes[j] == nclass) avg_topk += 1;
|
||||
}
|
||||
|
||||
printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
|
||||
|
@ -383,14 +383,14 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
|
|||
|
||||
float avg_acc = 0;
|
||||
float avg_topk = 0;
|
||||
int *indexes = calloc(topk, sizeof(int));
|
||||
int *indexes = (int *)calloc(topk, sizeof(int));
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
int class = -1;
|
||||
int nclass = -1;
|
||||
char *path = paths[i];
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(strstr(path, labels[j])){
|
||||
class = j;
|
||||
nclass = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -407,12 +407,12 @@ void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
|
|||
free_image(crop);
|
||||
top_k(pred, classes, topk, indexes);
|
||||
|
||||
if(indexes[0] == class) avg_acc += 1;
|
||||
if(indexes[0] == nclass) avg_acc += 1;
|
||||
for(j = 0; j < topk; ++j){
|
||||
if(indexes[j] == class) avg_topk += 1;
|
||||
if(indexes[j] == nclass) avg_topk += 1;
|
||||
}
|
||||
|
||||
printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]);
|
||||
printf("%s, %d, %f, %f, \n", paths[i], nclass, pred[0], pred[1]);
|
||||
printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
|
||||
}
|
||||
}
|
||||
|
@ -443,18 +443,18 @@ void validate_classifier_multi(char *datacfg, char *cfg, char *weights)
|
|||
|
||||
float avg_acc = 0;
|
||||
float avg_topk = 0;
|
||||
int *indexes = calloc(topk, sizeof(int));
|
||||
int *indexes = (int *)calloc(topk, sizeof(int));
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
int class = -1;
|
||||
int nclass = -1;
|
||||
char *path = paths[i];
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(strstr(path, labels[j])){
|
||||
class = j;
|
||||
nclass = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
float *pred = calloc(classes, sizeof(float));
|
||||
float *pred = (float*) calloc(classes, sizeof(float));
|
||||
image im = load_image_color(paths[i], 0, 0);
|
||||
for(j = 0; j < nscales; ++j){
|
||||
image r = resize_max(im, scales[j]);
|
||||
|
@ -470,9 +470,9 @@ void validate_classifier_multi(char *datacfg, char *cfg, char *weights)
|
|||
free_image(im);
|
||||
top_k(pred, classes, topk, indexes);
|
||||
free(pred);
|
||||
if(indexes[0] == class) avg_acc += 1;
|
||||
if(indexes[0] == nclass) avg_acc += 1;
|
||||
for(j = 0; j < topk; ++j){
|
||||
if(indexes[j] == class) avg_topk += 1;
|
||||
if(indexes[j] == nclass) avg_topk += 1;
|
||||
}
|
||||
|
||||
printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
|
||||
|
@ -494,7 +494,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena
|
|||
int i = 0;
|
||||
char **names = get_labels(name_list);
|
||||
clock_t time;
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
|
@ -572,7 +572,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
|
|||
int i = 0;
|
||||
char **names = get_labels(name_list);
|
||||
clock_t time;
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
|
@ -662,7 +662,7 @@ void csv_classifier(char *datacfg, char *cfgfile, char *weightfile)
|
|||
char **paths = (char **)list_to_array(plist);
|
||||
int m = plist->size;
|
||||
free_list(plist);
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
double time = what_time_is_it_now();
|
||||
|
@ -813,7 +813,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
|
|||
char *name_list = option_find_str(options, "names", 0);
|
||||
char **names = get_labels(name_list);
|
||||
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
|
||||
|
@ -935,7 +935,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
|
|||
char *name_list = option_find_str(options, "names", 0);
|
||||
char **names = get_labels(name_list);
|
||||
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
float fps = 0;
|
||||
|
@ -1005,7 +1005,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||
char *name_list = option_find_str(options, "names", label_list);
|
||||
char **names = get_labels(name_list);
|
||||
|
||||
int *indexes = calloc(top, sizeof(int));
|
||||
int *indexes = (int *)calloc(top, sizeof(int));
|
||||
|
||||
if(!cap) error("Couldn't connect to webcam.\n");
|
||||
float fps = 0;
|
|
@ -155,11 +155,11 @@ void validate_coco(char *cfg, char *weights)
|
|||
float iou_thresh = .5;
|
||||
|
||||
int nthreads = 8;
|
||||
image *val = calloc(nthreads, sizeof(image));
|
||||
image *val_resized = calloc(nthreads, sizeof(image));
|
||||
image *buf = calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
||||
image *val = (image *)calloc(nthreads, sizeof(image));
|
||||
image *val_resized = (image *)calloc(nthreads, sizeof(image));
|
||||
image *buf = (image *)calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = (image *)calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = (pthread_t *)calloc(nthreads, sizeof(pthread_t));
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net->w;
|
||||
|
@ -225,7 +225,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
|
|||
int side = l.side;
|
||||
|
||||
int j, k;
|
||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
||||
FILE **fps = (FILE **)calloc(classes, sizeof(FILE *));
|
||||
for(j = 0; j < classes; ++j){
|
||||
char buff[1024];
|
||||
snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
|
|
@ -5,101 +5,117 @@
|
|||
#include <stdio.h>
|
||||
|
||||
extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
|
||||
extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen);
|
||||
|
||||
extern void
|
||||
test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh,
|
||||
char *outfile, int fullscreen);
|
||||
|
||||
extern void run_yolo(int argc, char **argv);
|
||||
|
||||
extern void run_detector(int argc, char **argv);
|
||||
|
||||
extern void run_coco(int argc, char **argv);
|
||||
|
||||
extern void run_nightmare(int argc, char **argv);
|
||||
|
||||
extern void run_classifier(int argc, char **argv);
|
||||
|
||||
extern void run_regressor(int argc, char **argv);
|
||||
|
||||
extern void run_segmenter(int argc, char **argv);
|
||||
|
||||
extern void run_isegmenter(int argc, char **argv);
|
||||
|
||||
extern void run_char_rnn(int argc, char **argv);
|
||||
|
||||
extern void run_tag(int argc, char **argv);
|
||||
|
||||
extern void run_cifar(int argc, char **argv);
|
||||
|
||||
extern void run_go(int argc, char **argv);
|
||||
|
||||
extern void run_art(int argc, char **argv);
|
||||
|
||||
extern void run_super(int argc, char **argv);
|
||||
|
||||
extern void run_lsd(int argc, char **argv);
|
||||
|
||||
void average(int argc, char *argv[])
|
||||
{
|
||||
void average(int argc, char *argv[]) {
|
||||
char *cfgfile = argv[2];
|
||||
char *outfile = argv[3];
|
||||
gpu_index = -1;
|
||||
network *net = parse_network_cfg(cfgfile);
|
||||
network *sum = parse_network_cfg(cfgfile);
|
||||
|
||||
char *weightfile = argv[4];
|
||||
char *weightfile = argv[4];
|
||||
load_weights(sum, weightfile);
|
||||
|
||||
int i, j;
|
||||
int n = argc - 5;
|
||||
for(i = 0; i < n; ++i){
|
||||
weightfile = argv[i+5];
|
||||
for (i = 0; i < n; ++i) {
|
||||
weightfile = argv[i + 5];
|
||||
load_weights(net, weightfile);
|
||||
for(j = 0; j < net->n; ++j){
|
||||
for (j = 0; j < net->n; ++j) {
|
||||
layer l = net->layers[j];
|
||||
layer out = sum->layers[j];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
int num = l.n*l.c*l.size*l.size;
|
||||
if (l.type == CONVOLUTIONAL) {
|
||||
int num = l.n * l.c * l.size * l.size;
|
||||
axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
|
||||
axpy_cpu(num, 1, l.weights, 1, out.weights, 1);
|
||||
if(l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1);
|
||||
axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1);
|
||||
axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1);
|
||||
}
|
||||
}
|
||||
if(l.type == CONNECTED){
|
||||
if (l.type == CONNECTED) {
|
||||
axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1);
|
||||
axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1);
|
||||
axpy_cpu(l.outputs * l.inputs, 1, l.weights, 1, out.weights, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
n = n+1;
|
||||
for(j = 0; j < net->n; ++j){
|
||||
n = n + 1;
|
||||
for (j = 0; j < net->n; ++j) {
|
||||
layer l = sum->layers[j];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
int num = l.n*l.c*l.size*l.size;
|
||||
scal_cpu(l.n, 1./n, l.biases, 1);
|
||||
scal_cpu(num, 1./n, l.weights, 1);
|
||||
if(l.batch_normalize){
|
||||
scal_cpu(l.n, 1./n, l.scales, 1);
|
||||
scal_cpu(l.n, 1./n, l.rolling_mean, 1);
|
||||
scal_cpu(l.n, 1./n, l.rolling_variance, 1);
|
||||
}
|
||||
if (l.type == CONVOLUTIONAL) {
|
||||
int num = l.n * l.c * l.size * l.size;
|
||||
scal_cpu(l.n, 1. / n, l.biases, 1);
|
||||
scal_cpu(num, 1. / n, l.weights, 1);
|
||||
if (l.batch_normalize) {
|
||||
scal_cpu(l.n, 1. / n, l.scales, 1);
|
||||
scal_cpu(l.n, 1. / n, l.rolling_mean, 1);
|
||||
scal_cpu(l.n, 1. / n, l.rolling_variance, 1);
|
||||
}
|
||||
}
|
||||
if(l.type == CONNECTED){
|
||||
scal_cpu(l.outputs, 1./n, l.biases, 1);
|
||||
scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
|
||||
if (l.type == CONNECTED) {
|
||||
scal_cpu(l.outputs, 1. / n, l.biases, 1);
|
||||
scal_cpu(l.outputs * l.inputs, 1. / n, l.weights, 1);
|
||||
}
|
||||
}
|
||||
save_weights(sum, outfile);
|
||||
}
|
||||
|
||||
long numops(network *net)
|
||||
{
|
||||
long numops(network *net) {
|
||||
int i;
|
||||
long ops = 0;
|
||||
for(i = 0; i < net->n; ++i){
|
||||
for (i = 0; i < net->n; ++i) {
|
||||
layer l = net->layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w;
|
||||
} else if(l.type == CONNECTED){
|
||||
if (l.type == CONVOLUTIONAL) {
|
||||
ops += 2l * l.n * l.size * l.size * l.c / l.groups * l.out_h * l.out_w;
|
||||
} else if (l.type == CONNECTED) {
|
||||
ops += 2l * l.inputs * l.outputs;
|
||||
} else if (l.type == RNN){
|
||||
} else if (l.type == RNN) {
|
||||
ops += 2l * l.input_layer->inputs * l.input_layer->outputs;
|
||||
ops += 2l * l.self_layer->inputs * l.self_layer->outputs;
|
||||
ops += 2l * l.output_layer->inputs * l.output_layer->outputs;
|
||||
} else if (l.type == GRU){
|
||||
} else if (l.type == GRU) {
|
||||
ops += 2l * l.uz->inputs * l.uz->outputs;
|
||||
ops += 2l * l.uh->inputs * l.uh->outputs;
|
||||
ops += 2l * l.ur->inputs * l.ur->outputs;
|
||||
ops += 2l * l.wz->inputs * l.wz->outputs;
|
||||
ops += 2l * l.wh->inputs * l.wh->outputs;
|
||||
ops += 2l * l.wr->inputs * l.wr->outputs;
|
||||
} else if (l.type == LSTM){
|
||||
} else if (l.type == LSTM) {
|
||||
ops += 2l * l.uf->inputs * l.uf->outputs;
|
||||
ops += 2l * l.ui->inputs * l.ui->outputs;
|
||||
ops += 2l * l.ug->inputs * l.ug->outputs;
|
||||
|
@ -113,67 +129,63 @@ long numops(network *net)
|
|||
return ops;
|
||||
}
|
||||
|
||||
void speed(char *cfgfile, int tics)
|
||||
{
|
||||
void speed(char *cfgfile, int tics) {
|
||||
if (tics == 0) tics = 1000;
|
||||
network *net = parse_network_cfg(cfgfile);
|
||||
set_batch_network(net, 1);
|
||||
int i;
|
||||
double time=what_time_is_it_now();
|
||||
image im = make_image(net->w, net->h, net->c*net->batch);
|
||||
for(i = 0; i < tics; ++i){
|
||||
double time = what_time_is_it_now();
|
||||
image im = make_image(net->w, net->h, net->c * net->batch);
|
||||
for (i = 0; i < tics; ++i) {
|
||||
network_predict(net, im.data);
|
||||
}
|
||||
double t = what_time_is_it_now() - time;
|
||||
long ops = numops(net);
|
||||
printf("\n%d evals, %f Seconds\n", tics, t);
|
||||
printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
|
||||
printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t);
|
||||
printf("Speed: %f sec/eval\n", t/tics);
|
||||
printf("Speed: %f Hz\n", tics/t);
|
||||
printf("Floating Point Operations: %.2f Bn\n", (float) ops / 1000000000.);
|
||||
printf("FLOPS: %.2f Bn\n", (float) ops / 1000000000. * tics / t);
|
||||
printf("Speed: %f sec/eval\n", t / tics);
|
||||
printf("Speed: %f Hz\n", tics / t);
|
||||
}
|
||||
|
||||
void operations(char *cfgfile)
|
||||
{
|
||||
void operations(char *cfgfile) {
|
||||
gpu_index = -1;
|
||||
network *net = parse_network_cfg(cfgfile);
|
||||
long ops = numops(net);
|
||||
printf("Floating Point Operations: %ld\n", ops);
|
||||
printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
|
||||
printf("Floating Point Operations: %.2f Bn\n", (float) ops / 1000000000.);
|
||||
}
|
||||
|
||||
void oneoff(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void oneoff(char *cfgfile, char *weightfile, char *outfile) {
|
||||
gpu_index = -1;
|
||||
network *net = parse_network_cfg(cfgfile);
|
||||
int oldn = net->layers[net->n - 2].n;
|
||||
int c = net->layers[net->n - 2].c;
|
||||
scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1);
|
||||
scal_cpu(oldn * c, .1, net->layers[net->n - 2].weights, 1);
|
||||
scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1);
|
||||
net->layers[net->n - 2].n = 11921;
|
||||
net->layers[net->n - 2].biases += 5;
|
||||
net->layers[net->n - 2].weights += 5*c;
|
||||
if(weightfile){
|
||||
net->layers[net->n - 2].weights += 5 * c;
|
||||
if (weightfile) {
|
||||
load_weights(net, weightfile);
|
||||
}
|
||||
net->layers[net->n - 2].biases -= 5;
|
||||
net->layers[net->n - 2].weights -= 5*c;
|
||||
net->layers[net->n - 2].weights -= 5 * c;
|
||||
net->layers[net->n - 2].n = oldn;
|
||||
printf("%d\n", oldn);
|
||||
layer l = net->layers[net->n - 2];
|
||||
copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1);
|
||||
copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1);
|
||||
copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1);
|
||||
copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1);
|
||||
copy_cpu(l.n / 3, l.biases, 1, l.biases + l.n / 3, 1);
|
||||
copy_cpu(l.n / 3, l.biases, 1, l.biases + 2 * l.n / 3, 1);
|
||||
copy_cpu(l.n / 3 * l.c, l.weights, 1, l.weights + l.n / 3 * l.c, 1);
|
||||
copy_cpu(l.n / 3 * l.c, l.weights, 1, l.weights + 2 * l.n / 3 * l.c, 1);
|
||||
*net->seen = 0;
|
||||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l)
|
||||
{
|
||||
void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l) {
|
||||
gpu_index = -1;
|
||||
network *net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
if (weightfile) {
|
||||
load_weights_upto(net, weightfile, 0, net->n);
|
||||
load_weights_upto(net, weightfile, l, net->n);
|
||||
}
|
||||
|
@ -181,25 +193,23 @@ void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l)
|
|||
save_weights_upto(net, outfile, net->n);
|
||||
}
|
||||
|
||||
void partial(char *cfgfile, char *weightfile, char *outfile, int max)
|
||||
{
|
||||
void partial(char *cfgfile, char *weightfile, char *outfile, int max) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 1);
|
||||
save_weights_upto(net, outfile, max);
|
||||
}
|
||||
|
||||
void print_weights(char *cfgfile, char *weightfile, int n)
|
||||
{
|
||||
void print_weights(char *cfgfile, char *weightfile, int n) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 1);
|
||||
layer l = net->layers[n];
|
||||
int i, j;
|
||||
//printf("[");
|
||||
for(i = 0; i < l.n; ++i){
|
||||
for (i = 0; i < l.n; ++i) {
|
||||
//printf("[");
|
||||
for(j = 0; j < l.size*l.size*l.c; ++j){
|
||||
for (j = 0; j < l.size * l.size * l.c; ++j) {
|
||||
//if(j > 0) printf(",");
|
||||
printf("%g ", l.weights[i*l.size*l.size*l.c + j]);
|
||||
printf("%g ", l.weights[i * l.size * l.size * l.c + j]);
|
||||
}
|
||||
printf("\n");
|
||||
//printf("]%s\n", (i == l.n-1)?"":",");
|
||||
|
@ -207,14 +217,13 @@ void print_weights(char *cfgfile, char *weightfile, int n)
|
|||
//printf("]");
|
||||
}
|
||||
|
||||
void rescale_net(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void rescale_net(char *cfgfile, char *weightfile, char *outfile) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
int i;
|
||||
for(i = 0; i < net->n; ++i){
|
||||
for (i = 0; i < net->n; ++i) {
|
||||
layer l = net->layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
if (l.type == CONVOLUTIONAL) {
|
||||
rescale_weights(l, 2, -.5);
|
||||
break;
|
||||
}
|
||||
|
@ -222,14 +231,13 @@ void rescale_net(char *cfgfile, char *weightfile, char *outfile)
|
|||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void rgbgr_net(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
int i;
|
||||
for(i = 0; i < net->n; ++i){
|
||||
for (i = 0; i < net->n; ++i) {
|
||||
layer l = net->layers[i];
|
||||
if(l.type == CONVOLUTIONAL){
|
||||
if (l.type == CONVOLUTIONAL) {
|
||||
rgbgr_weights(l);
|
||||
break;
|
||||
}
|
||||
|
@ -237,8 +245,7 @@ void rgbgr_net(char *cfgfile, char *weightfile, char *outfile)
|
|||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
int i;
|
||||
|
@ -262,27 +269,25 @@ void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
|
|||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
layer normalize_layer(layer l, int n)
|
||||
{
|
||||
layer normalize_layer(layer l, int n) {
|
||||
int j;
|
||||
l.batch_normalize=1;
|
||||
l.scales = calloc(n, sizeof(float));
|
||||
for(j = 0; j < n; ++j){
|
||||
l.batch_normalize = 1;
|
||||
l.scales = (float *) calloc(n, sizeof(float));
|
||||
for (j = 0; j < n; ++j) {
|
||||
l.scales[j] = 1;
|
||||
}
|
||||
l.rolling_mean = calloc(n, sizeof(float));
|
||||
l.rolling_variance = calloc(n, sizeof(float));
|
||||
l.rolling_mean = (float *) calloc(n, sizeof(float));
|
||||
l.rolling_variance = (float *) calloc(n, sizeof(float));
|
||||
return l;
|
||||
}
|
||||
|
||||
void normalize_net(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void normalize_net(char *cfgfile, char *weightfile, char *outfile) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
int i;
|
||||
for(i = 0; i < net->n; ++i){
|
||||
for (i = 0; i < net->n; ++i) {
|
||||
layer l = net->layers[i];
|
||||
if(l.type == CONVOLUTIONAL && !l.batch_normalize){
|
||||
if (l.type == CONVOLUTIONAL && !l.batch_normalize) {
|
||||
net->layers[i] = normalize_layer(l, l.n);
|
||||
}
|
||||
if (l.type == CONNECTED && !l.batch_normalize) {
|
||||
|
@ -295,14 +300,13 @@ void normalize_net(char *cfgfile, char *weightfile, char *outfile)
|
|||
*l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs);
|
||||
*l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs);
|
||||
*l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs);
|
||||
net->layers[i].batch_normalize=1;
|
||||
net->layers[i].batch_normalize = 1;
|
||||
}
|
||||
}
|
||||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void statistics_net(char *cfgfile, char *weightfile)
|
||||
{
|
||||
void statistics_net(char *cfgfile, char *weightfile) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
int i;
|
||||
|
@ -331,8 +335,7 @@ void statistics_net(char *cfgfile, char *weightfile)
|
|||
}
|
||||
}
|
||||
|
||||
void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void denormalize_net(char *cfgfile, char *weightfile, char *outfile) {
|
||||
gpu_index = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
int i;
|
||||
|
@ -340,11 +343,11 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
|
|||
layer l = net->layers[i];
|
||||
if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) {
|
||||
denormalize_convolutional_layer(l);
|
||||
net->layers[i].batch_normalize=0;
|
||||
net->layers[i].batch_normalize = 0;
|
||||
}
|
||||
if (l.type == CONNECTED && l.batch_normalize) {
|
||||
denormalize_connected_layer(l);
|
||||
net->layers[i].batch_normalize=0;
|
||||
net->layers[i].batch_normalize = 0;
|
||||
}
|
||||
if (l.type == GRU && l.batch_normalize) {
|
||||
denormalize_connected_layer(*l.input_z_layer);
|
||||
|
@ -359,28 +362,27 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
|
|||
l.state_z_layer->batch_normalize = 0;
|
||||
l.state_r_layer->batch_normalize = 0;
|
||||
l.state_h_layer->batch_normalize = 0;
|
||||
net->layers[i].batch_normalize=0;
|
||||
net->layers[i].batch_normalize = 0;
|
||||
}
|
||||
}
|
||||
save_weights(net, outfile);
|
||||
}
|
||||
|
||||
void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix)
|
||||
{
|
||||
void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix) {
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
image *ims = get_weights(net->layers[0]);
|
||||
int n = net->layers[0].n;
|
||||
int z;
|
||||
for(z = 0; z < num; ++z){
|
||||
for (z = 0; z < num; ++z) {
|
||||
image im = make_image(h, w, 3);
|
||||
fill_image(im, .5);
|
||||
int i;
|
||||
for(i = 0; i < 100; ++i){
|
||||
image r = copy_image(ims[rand()%n]);
|
||||
rotate_image_cw(r, rand()%4);
|
||||
for (i = 0; i < 100; ++i) {
|
||||
image r = copy_image(ims[rand() % n]);
|
||||
rotate_image_cw(r, rand() % 4);
|
||||
random_distort_image(r, 1, 1.5, 1.5);
|
||||
int dx = rand()%(w-r.w);
|
||||
int dy = rand()%(h-r.h);
|
||||
int dx = rand() % (w - r.w);
|
||||
int dy = rand() % (h - r.h);
|
||||
ghost_image(r, im, dx, dy);
|
||||
free_image(r);
|
||||
}
|
||||
|
@ -391,23 +393,22 @@ void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix)
|
|||
}
|
||||
}
|
||||
|
||||
void visualize(char *cfgfile, char *weightfile)
|
||||
{
|
||||
void visualize(char *cfgfile, char *weightfile) {
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
visualize_network(net);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int main(int argc, char **argv) {
|
||||
printf("argv is %s\n", argv[1]);
|
||||
//test_resize("data/bad.jpg");
|
||||
//test_box();
|
||||
//test_convolutional_layer();
|
||||
if(argc < 2){
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "usage: %s <function>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
gpu_index = find_int_arg(argc, argv, "-i", 0);
|
||||
if(find_arg(argc, argv, "-nogpu")) {
|
||||
if (find_arg(argc, argv, "-nogpu")) {
|
||||
gpu_index = -1;
|
||||
}
|
||||
|
||||
|
@ -419,81 +420,83 @@ int main(int argc, char **argv)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (0 == strcmp(argv[1], "average")){
|
||||
printf("gpu_index is %d\n", gpu_index);
|
||||
|
||||
if (0 == strcmp(argv[1], "average")) {
|
||||
average(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "yolo")){
|
||||
} else if (0 == strcmp(argv[1], "yolo")) {
|
||||
run_yolo(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "super")){
|
||||
} else if (0 == strcmp(argv[1], "super")) {
|
||||
run_super(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "lsd")){
|
||||
} else if (0 == strcmp(argv[1], "lsd")) {
|
||||
run_lsd(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "detector")){
|
||||
} else if (0 == strcmp(argv[1], "detector")) {
|
||||
run_detector(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "detect")){
|
||||
} else if (0 == strcmp(argv[1], "detect")) {
|
||||
float thresh = find_float_arg(argc, argv, "-thresh", .5);
|
||||
char *filename = (argc > 4) ? argv[4]: 0;
|
||||
char *filename = (argc > 4) ? argv[4] : 0;
|
||||
char *outfile = find_char_arg(argc, argv, "-out", 0);
|
||||
int fullscreen = find_arg(argc, argv, "-fullscreen");
|
||||
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen);
|
||||
} else if (0 == strcmp(argv[1], "cifar")){
|
||||
} else if (0 == strcmp(argv[1], "cifar")) {
|
||||
run_cifar(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "go")){
|
||||
} else if (0 == strcmp(argv[1], "go")) {
|
||||
run_go(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "rnn")){
|
||||
} else if (0 == strcmp(argv[1], "rnn")) {
|
||||
run_char_rnn(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "coco")){
|
||||
} else if (0 == strcmp(argv[1], "coco")) {
|
||||
run_coco(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "classify")){
|
||||
} else if (0 == strcmp(argv[1], "classify")) {
|
||||
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
|
||||
} else if (0 == strcmp(argv[1], "classifier")){
|
||||
} else if (0 == strcmp(argv[1], "classifier")) {
|
||||
run_classifier(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "regressor")){
|
||||
} else if (0 == strcmp(argv[1], "regressor")) {
|
||||
run_regressor(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "isegmenter")){
|
||||
} else if (0 == strcmp(argv[1], "isegmenter")) {
|
||||
run_isegmenter(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "segmenter")){
|
||||
} else if (0 == strcmp(argv[1], "segmenter")) {
|
||||
run_segmenter(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "art")){
|
||||
} else if (0 == strcmp(argv[1], "art")) {
|
||||
run_art(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "tag")){
|
||||
} else if (0 == strcmp(argv[1], "tag")) {
|
||||
run_tag(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "3d")){
|
||||
} else if (0 == strcmp(argv[1], "3d")) {
|
||||
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
|
||||
} else if (0 == strcmp(argv[1], "test")){
|
||||
} else if (0 == strcmp(argv[1], "test")) {
|
||||
test_resize(argv[2]);
|
||||
} else if (0 == strcmp(argv[1], "nightmare")){
|
||||
} else if (0 == strcmp(argv[1], "nightmare")) {
|
||||
run_nightmare(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "rgbgr")){
|
||||
} else if (0 == strcmp(argv[1], "rgbgr")) {
|
||||
rgbgr_net(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "reset")){
|
||||
} else if (0 == strcmp(argv[1], "reset")) {
|
||||
reset_normalize_net(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "denormalize")){
|
||||
} else if (0 == strcmp(argv[1], "denormalize")) {
|
||||
denormalize_net(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "statistics")){
|
||||
} else if (0 == strcmp(argv[1], "statistics")) {
|
||||
statistics_net(argv[2], argv[3]);
|
||||
} else if (0 == strcmp(argv[1], "normalize")){
|
||||
} else if (0 == strcmp(argv[1], "normalize")) {
|
||||
normalize_net(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "rescale")){
|
||||
} else if (0 == strcmp(argv[1], "rescale")) {
|
||||
rescale_net(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "ops")){
|
||||
} else if (0 == strcmp(argv[1], "ops")) {
|
||||
operations(argv[2]);
|
||||
} else if (0 == strcmp(argv[1], "speed")){
|
||||
} else if (0 == strcmp(argv[1], "speed")) {
|
||||
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
|
||||
} else if (0 == strcmp(argv[1], "oneoff")){
|
||||
} else if (0 == strcmp(argv[1], "oneoff")) {
|
||||
oneoff(argv[2], argv[3], argv[4]);
|
||||
} else if (0 == strcmp(argv[1], "oneoff2")){
|
||||
} else if (0 == strcmp(argv[1], "oneoff2")) {
|
||||
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
|
||||
} else if (0 == strcmp(argv[1], "print")){
|
||||
} else if (0 == strcmp(argv[1], "print")) {
|
||||
print_weights(argv[2], argv[3], atoi(argv[4]));
|
||||
} else if (0 == strcmp(argv[1], "partial")){
|
||||
} else if (0 == strcmp(argv[1], "partial")) {
|
||||
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
|
||||
} else if (0 == strcmp(argv[1], "average")){
|
||||
} else if (0 == strcmp(argv[1], "average")) {
|
||||
average(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "visualize")){
|
||||
} else if (0 == strcmp(argv[1], "visualize")) {
|
||||
visualize(argv[2], (argc > 3) ? argv[3] : 0);
|
||||
} else if (0 == strcmp(argv[1], "mkimg")){
|
||||
} else if (0 == strcmp(argv[1], "mkimg")) {
|
||||
mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
|
||||
} else if (0 == strcmp(argv[1], "imtest")){
|
||||
} else if (0 == strcmp(argv[1], "imtest")) {
|
||||
test_resize(argv[2]);
|
||||
} else {
|
||||
fprintf(stderr, "Not an option: %s\n", argv[1]);
|
|
@ -1,56 +0,0 @@
|
|||
# Stupid python path shit.
|
||||
# Instead just add darknet.py to somewhere in your python path
|
||||
# OK actually that might not be a great idea, idk, work in progress
|
||||
# Use at your own risk. or don't, i don't care
|
||||
|
||||
from scipy.misc import imread
|
||||
import cv2
|
||||
|
||||
def array_to_image(arr):
|
||||
arr = arr.transpose(2,0,1)
|
||||
c = arr.shape[0]
|
||||
h = arr.shape[1]
|
||||
w = arr.shape[2]
|
||||
arr = (arr/255.0).flatten()
|
||||
data = dn.c_array(dn.c_float, arr)
|
||||
im = dn.IMAGE(w,h,c,data)
|
||||
return im
|
||||
|
||||
def detect2(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
|
||||
boxes = dn.make_boxes(net)
|
||||
probs = dn.make_probs(net)
|
||||
num = dn.num_boxes(net)
|
||||
dn.network_detect(net, image, thresh, hier_thresh, nms, boxes, probs)
|
||||
res = []
|
||||
for j in range(num):
|
||||
for i in range(meta.classes):
|
||||
if probs[j][i] > 0:
|
||||
res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h)))
|
||||
res = sorted(res, key=lambda x: -x[1])
|
||||
dn.free_ptrs(dn.cast(probs, dn.POINTER(dn.c_void_p)), num)
|
||||
return res
|
||||
|
||||
import sys, os
|
||||
sys.path.append(os.path.join(os.getcwd(),'python/'))
|
||||
|
||||
import darknet as dn
|
||||
|
||||
# Darknet
|
||||
net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0)
|
||||
meta = dn.load_meta("cfg/coco.data")
|
||||
r = dn.detect(net, meta, "data/dog.jpg")
|
||||
print r
|
||||
|
||||
# scipy
|
||||
arr= imread('data/dog.jpg')
|
||||
im = array_to_image(arr)
|
||||
r = detect2(net, meta, im)
|
||||
print r
|
||||
|
||||
# OpenCV
|
||||
arr = cv2.imread('data/dog.jpg')
|
||||
im = array_to_image(arr)
|
||||
dn.rgbgr_image(im)
|
||||
r = detect2(net, meta, im)
|
||||
print r
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
#include "darknet.h"
|
||||
|
||||
static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
|
||||
static int coco_ids[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84,
|
||||
85, 86, 87, 88, 89, 90};
|
||||
|
||||
|
||||
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
||||
{
|
||||
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) {
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *train_images = option_find_str(options, "train", "data/train.list");
|
||||
char *backup_directory = option_find_str(options, "backup", "/backup/");
|
||||
|
@ -13,12 +15,12 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
float avg_loss = -1;
|
||||
network **nets = calloc(ngpus, sizeof(network));
|
||||
network **nets = (network **) calloc(ngpus, sizeof(network));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
int i;
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
|
@ -40,7 +42,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
|
||||
list *plist = get_paths(train_images);
|
||||
//int N = plist->size;
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
load_args args = get_base_args(net);
|
||||
args.coords = l.coords;
|
||||
|
@ -59,11 +61,11 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
double time;
|
||||
int count = 0;
|
||||
//while(i*imgs < N*120){
|
||||
while(get_current_batch(net) < net->max_batches){
|
||||
if(l.random && count++%10 == 0){
|
||||
while (get_current_batch(net) < net->max_batches) {
|
||||
if (l.random && count++ % 10 == 0) {
|
||||
printf("Resizing\n");
|
||||
int dim = (rand() % 10 + 10) * 32;
|
||||
if (get_current_batch(net)+200 > net->max_batches) dim = 608;
|
||||
if (get_current_batch(net) + 200 > net->max_batches) dim = 608;
|
||||
//int dim = (rand() % 4 + 16) * 32;
|
||||
printf("%d\n", dim);
|
||||
args.w = dim;
|
||||
|
@ -74,13 +76,13 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
free_data(train);
|
||||
load_thread = load_data(args);
|
||||
|
||||
#pragma omp parallel for
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
resize_network(nets[i], dim, dim);
|
||||
}
|
||||
net = nets[0];
|
||||
}
|
||||
time=what_time_is_it_now();
|
||||
time = what_time_is_it_now();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
|
@ -109,9 +111,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
}
|
||||
*/
|
||||
|
||||
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
|
||||
printf("Loaded: %lf seconds\n", what_time_is_it_now() - time);
|
||||
|
||||
time=what_time_is_it_now();
|
||||
time = what_time_is_it_now();
|
||||
float loss = 0;
|
||||
#ifdef GPU
|
||||
if(ngpus == 1){
|
||||
|
@ -123,11 +125,12 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
loss = train_network(net, train);
|
||||
#endif
|
||||
if (avg_loss < 0) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
avg_loss = avg_loss * .9 + loss * .1;
|
||||
|
||||
i = get_current_batch(net);
|
||||
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
|
||||
if(i%100==0){
|
||||
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss,
|
||||
get_current_rate(net), what_time_is_it_now() - time, i * imgs);
|
||||
if (i % 100 == 0) {
|
||||
#ifdef GPU
|
||||
if(ngpus != 1) sync_nets(nets, ngpus, 0);
|
||||
#endif
|
||||
|
@ -135,7 +138,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(i%10000==0 || (i < 1000 && i%100 == 0)){
|
||||
if (i % 10000 == 0 || (i < 1000 && i % 100 == 0)) {
|
||||
#ifdef GPU
|
||||
if(ngpus != 1) sync_nets(nets, ngpus, 0);
|
||||
#endif
|
||||
|
@ -154,23 +157,21 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
|
|||
}
|
||||
|
||||
|
||||
static int get_coco_image_id(char *filename)
|
||||
{
|
||||
static int get_coco_image_id(char *filename) {
|
||||
char *p = strrchr(filename, '/');
|
||||
char *c = strrchr(filename, '_');
|
||||
if(c) p = c;
|
||||
return atoi(p+1);
|
||||
if (c) p = c;
|
||||
return atoi(p + 1);
|
||||
}
|
||||
|
||||
static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h)
|
||||
{
|
||||
static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) {
|
||||
int i, j;
|
||||
int image_id = get_coco_image_id(image_path);
|
||||
for(i = 0; i < num_boxes; ++i){
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
|
||||
for (i = 0; i < num_boxes; ++i) {
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.;
|
||||
|
||||
if (xmin < 0) xmin = 0;
|
||||
if (ymin < 0) ymin = 0;
|
||||
|
@ -182,57 +183,58 @@ static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_box
|
|||
float bw = xmax - xmin;
|
||||
float bh = ymax - ymin;
|
||||
|
||||
for(j = 0; j < classes; ++j){
|
||||
if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]);
|
||||
for (j = 0; j < classes; ++j) {
|
||||
if (dets[i].prob[j])
|
||||
fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n",
|
||||
image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h)
|
||||
{
|
||||
void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) {
|
||||
int i, j;
|
||||
for(i = 0; i < total; ++i){
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1;
|
||||
for (i = 0; i < total; ++i) {
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w / 2. + 1;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w / 2. + 1;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h / 2. + 1;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h / 2. + 1;
|
||||
|
||||
if (xmin < 1) xmin = 1;
|
||||
if (ymin < 1) ymin = 1;
|
||||
if (xmax > w) xmax = w;
|
||||
if (ymax > h) ymax = h;
|
||||
|
||||
for(j = 0; j < classes; ++j){
|
||||
if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
|
||||
xmin, ymin, xmax, ymax);
|
||||
for (j = 0; j < classes; ++j) {
|
||||
if (dets[i].prob[j])
|
||||
fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
|
||||
xmin, ymin, xmax, ymax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h)
|
||||
{
|
||||
void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) {
|
||||
int i, j;
|
||||
for(i = 0; i < total; ++i){
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
|
||||
for (i = 0; i < total; ++i) {
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.;
|
||||
|
||||
if (xmin < 0) xmin = 0;
|
||||
if (ymin < 0) ymin = 0;
|
||||
if (xmax > w) xmax = w;
|
||||
if (ymax > h) ymax = h;
|
||||
|
||||
for(j = 0; j < classes; ++j){
|
||||
int class = j;
|
||||
if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class],
|
||||
xmin, ymin, xmax, ymax);
|
||||
for (j = 0; j < classes; ++j) {
|
||||
int nclass = j;
|
||||
if (dets[i].prob[nclass])
|
||||
fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[nclass],
|
||||
xmin, ymin, xmax, ymax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile) {
|
||||
int j;
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *valid_images = option_find_str(options, "valid", "data/train.list");
|
||||
|
@ -249,9 +251,9 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
|
|||
srand(time(0));
|
||||
|
||||
list *plist = get_paths(valid_images);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
int classes = l.classes;
|
||||
|
||||
char buff[1024];
|
||||
|
@ -260,42 +262,42 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
|
|||
FILE **fps = 0;
|
||||
int coco = 0;
|
||||
int imagenet = 0;
|
||||
if(0==strcmp(type, "coco")){
|
||||
if(!outfile) outfile = "coco_results";
|
||||
if (0 == strcmp(type, "coco")) {
|
||||
if (!outfile) outfile = "coco_results";
|
||||
snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
|
||||
fp = fopen(buff, "w");
|
||||
fprintf(fp, "[\n");
|
||||
coco = 1;
|
||||
} else if(0==strcmp(type, "imagenet")){
|
||||
if(!outfile) outfile = "imagenet-detection";
|
||||
} else if (0 == strcmp(type, "imagenet")) {
|
||||
if (!outfile) outfile = "imagenet-detection";
|
||||
snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
|
||||
fp = fopen(buff, "w");
|
||||
imagenet = 1;
|
||||
classes = 200;
|
||||
} else {
|
||||
if(!outfile) outfile = "comp4_det_test_";
|
||||
fps = calloc(classes, sizeof(FILE *));
|
||||
for(j = 0; j < classes; ++j){
|
||||
if (!outfile) outfile = "comp4_det_test_";
|
||||
fps = (FILE **) calloc(classes, sizeof(FILE * ));
|
||||
for (j = 0; j < classes; ++j) {
|
||||
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
}
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
int i = 0;
|
||||
int t;
|
||||
|
||||
float thresh = .005;
|
||||
float nms = .45;
|
||||
|
||||
int nthreads = 4;
|
||||
image *val = calloc(nthreads, sizeof(image));
|
||||
image *val_resized = calloc(nthreads, sizeof(image));
|
||||
image *buf = calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
||||
image *val = (image *) calloc(nthreads, sizeof(image));
|
||||
image *val_resized = (image *) calloc(nthreads, sizeof(image));
|
||||
image *buf = (image *) calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = (image *) calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = (pthread_t *) calloc(nthreads, sizeof(pthread_t));
|
||||
|
||||
image input = make_image(net->w, net->h, net->c*2);
|
||||
image input = make_image(net->w, net->h, net->c * 2);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net->w;
|
||||
|
@ -303,32 +305,32 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
|
|||
//args.type = IMAGE_DATA;
|
||||
args.type = LETTERBOX_DATA;
|
||||
|
||||
for(t = 0; t < nthreads; ++t){
|
||||
args.path = paths[i+t];
|
||||
for (t = 0; t < nthreads; ++t) {
|
||||
args.path = paths[i + t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
double start = what_time_is_it_now();
|
||||
for(i = nthreads; i < m+nthreads; i += nthreads){
|
||||
for (i = nthreads; i < m + nthreads; i += nthreads) {
|
||||
fprintf(stderr, "%d\n", i);
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
|
||||
pthread_join(thr[t], 0);
|
||||
val[t] = buf[t];
|
||||
val_resized[t] = buf_resized[t];
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t < m; ++t){
|
||||
args.path = paths[i+t];
|
||||
for (t = 0; t < nthreads && i + t < m; ++t) {
|
||||
args.path = paths[i + t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
char *path = paths[i+t-nthreads];
|
||||
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
|
||||
char *path = paths[i + t - nthreads];
|
||||
char *id = basecfg(path);
|
||||
copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1);
|
||||
copy_cpu(net->w * net->h * net->c, val_resized[t].data, 1, input.data, 1);
|
||||
flip_image(val_resized[t]);
|
||||
copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1);
|
||||
copy_cpu(net->w * net->h * net->c, val_resized[t].data, 1, input.data + net->w * net->h * net->c, 1);
|
||||
|
||||
network_predict(net, input.data);
|
||||
int w = val[t].w;
|
||||
|
@ -336,10 +338,10 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
|
|||
int num = 0;
|
||||
detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num);
|
||||
if (nms) do_nms_sort(dets, num, classes, nms);
|
||||
if (coco){
|
||||
if (coco) {
|
||||
print_cocos(fp, path, dets, num, classes, w, h);
|
||||
} else if (imagenet){
|
||||
print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h);
|
||||
} else if (imagenet) {
|
||||
print_imagenet_detections(fp, i + t - nthreads + 1, dets, num, classes, w, h);
|
||||
} else {
|
||||
print_detector_detections(fps, id, dets, num, classes, w, h);
|
||||
}
|
||||
|
@ -349,11 +351,11 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
|
|||
free_image(val_resized[t]);
|
||||
}
|
||||
}
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(fps) fclose(fps[j]);
|
||||
for (j = 0; j < classes; ++j) {
|
||||
if (fps) fclose(fps[j]);
|
||||
}
|
||||
if(coco){
|
||||
fseek(fp, -2, SEEK_CUR);
|
||||
if (coco) {
|
||||
fseek(fp, -2, SEEK_CUR);
|
||||
fprintf(fp, "\n]\n");
|
||||
fclose(fp);
|
||||
}
|
||||
|
@ -361,8 +363,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
|
|||
}
|
||||
|
||||
|
||||
void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
|
||||
{
|
||||
void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) {
|
||||
int j;
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *valid_images = option_find_str(options, "valid", "data/train.list");
|
||||
|
@ -379,9 +380,9 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
srand(time(0));
|
||||
|
||||
list *plist = get_paths(valid_images);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
int classes = l.classes;
|
||||
|
||||
char buff[1024];
|
||||
|
@ -390,22 +391,22 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
FILE **fps = 0;
|
||||
int coco = 0;
|
||||
int imagenet = 0;
|
||||
if(0==strcmp(type, "coco")){
|
||||
if(!outfile) outfile = "coco_results";
|
||||
if (0 == strcmp(type, "coco")) {
|
||||
if (!outfile) outfile = "coco_results";
|
||||
snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
|
||||
fp = fopen(buff, "w");
|
||||
fprintf(fp, "[\n");
|
||||
coco = 1;
|
||||
} else if(0==strcmp(type, "imagenet")){
|
||||
if(!outfile) outfile = "imagenet-detection";
|
||||
} else if (0 == strcmp(type, "imagenet")) {
|
||||
if (!outfile) outfile = "imagenet-detection";
|
||||
snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
|
||||
fp = fopen(buff, "w");
|
||||
imagenet = 1;
|
||||
classes = 200;
|
||||
} else {
|
||||
if(!outfile) outfile = "comp4_det_test_";
|
||||
fps = calloc(classes, sizeof(FILE *));
|
||||
for(j = 0; j < classes; ++j){
|
||||
if (!outfile) outfile = "comp4_det_test_";
|
||||
fps = (FILE **)calloc(classes, sizeof(FILE * ));
|
||||
for (j = 0; j < classes; ++j) {
|
||||
snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
|
@ -413,18 +414,18 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
int i = 0;
|
||||
int t;
|
||||
|
||||
float thresh = .005;
|
||||
float nms = .45;
|
||||
|
||||
int nthreads = 4;
|
||||
image *val = calloc(nthreads, sizeof(image));
|
||||
image *val_resized = calloc(nthreads, sizeof(image));
|
||||
image *buf = calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
||||
image *val = (image *)calloc(nthreads, sizeof(image));
|
||||
image *val_resized = (image *)calloc(nthreads, sizeof(image));
|
||||
image *buf = (image *)calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = (image *)calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = (pthread_t *)calloc(nthreads, sizeof(pthread_t));
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net->w;
|
||||
|
@ -432,28 +433,28 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
//args.type = IMAGE_DATA;
|
||||
args.type = LETTERBOX_DATA;
|
||||
|
||||
for(t = 0; t < nthreads; ++t){
|
||||
args.path = paths[i+t];
|
||||
for (t = 0; t < nthreads; ++t) {
|
||||
args.path = paths[i + t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
double start = what_time_is_it_now();
|
||||
for(i = nthreads; i < m+nthreads; i += nthreads){
|
||||
for (i = nthreads; i < m + nthreads; i += nthreads) {
|
||||
fprintf(stderr, "%d\n", i);
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
|
||||
pthread_join(thr[t], 0);
|
||||
val[t] = buf[t];
|
||||
val_resized[t] = buf_resized[t];
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t < m; ++t){
|
||||
args.path = paths[i+t];
|
||||
for (t = 0; t < nthreads && i + t < m; ++t) {
|
||||
args.path = paths[i + t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
char *path = paths[i+t-nthreads];
|
||||
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
|
||||
char *path = paths[i + t - nthreads];
|
||||
char *id = basecfg(path);
|
||||
float *X = val_resized[t].data;
|
||||
network_predict(net, X);
|
||||
|
@ -462,10 +463,10 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
int nboxes = 0;
|
||||
detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes);
|
||||
if (nms) do_nms_sort(dets, nboxes, classes, nms);
|
||||
if (coco){
|
||||
if (coco) {
|
||||
print_cocos(fp, path, dets, nboxes, classes, w, h);
|
||||
} else if (imagenet){
|
||||
print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h);
|
||||
} else if (imagenet) {
|
||||
print_imagenet_detections(fp, i + t - nthreads + 1, dets, nboxes, classes, w, h);
|
||||
} else {
|
||||
print_detector_detections(fps, id, dets, nboxes, classes, w, h);
|
||||
}
|
||||
|
@ -475,33 +476,32 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
|
|||
free_image(val_resized[t]);
|
||||
}
|
||||
}
|
||||
for(j = 0; j < classes; ++j){
|
||||
if(fps) fclose(fps[j]);
|
||||
for (j = 0; j < classes; ++j) {
|
||||
if (fps) fclose(fps[j]);
|
||||
}
|
||||
if(coco){
|
||||
fseek(fp, -2, SEEK_CUR);
|
||||
if (coco) {
|
||||
fseek(fp, -2, SEEK_CUR);
|
||||
fprintf(fp, "\n]\n");
|
||||
fclose(fp);
|
||||
}
|
||||
fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
|
||||
}
|
||||
|
||||
void validate_detector_recall(char *cfgfile, char *weightfile)
|
||||
{
|
||||
void validate_detector_recall(char *cfgfile, char *weightfile) {
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
set_batch_network(net, 1);
|
||||
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
|
||||
srand(time(0));
|
||||
|
||||
list *plist = get_paths("data/coco_val_5k.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
|
||||
int j, k;
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
int i = 0;
|
||||
|
||||
float thresh = .001;
|
||||
float iou_thresh = .5;
|
||||
|
@ -512,7 +512,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
|||
int proposals = 0;
|
||||
float avg_iou = 0;
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
for (i = 0; i < m; ++i) {
|
||||
char *path = paths[i];
|
||||
image orig = load_image_color(path, 0, 0);
|
||||
image sized = resize_image(orig, net->w, net->h);
|
||||
|
@ -530,8 +530,8 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
|||
|
||||
int num_labels = 0;
|
||||
box_label *truth = read_boxes(labelpath, &num_labels);
|
||||
for(k = 0; k < nboxes; ++k){
|
||||
if(dets[k].objectness > thresh){
|
||||
for (k = 0; k < nboxes; ++k) {
|
||||
if (dets[k].objectness > thresh) {
|
||||
++proposals;
|
||||
}
|
||||
}
|
||||
|
@ -539,19 +539,20 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
|||
++total;
|
||||
box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
|
||||
float best_iou = 0;
|
||||
for(k = 0; k < l.w*l.h*l.n; ++k){
|
||||
for (k = 0; k < l.w * l.h * l.n; ++k) {
|
||||
float iou = box_iou(dets[k].bbox, t);
|
||||
if(dets[k].objectness > thresh && iou > best_iou){
|
||||
if (dets[k].objectness > thresh && iou > best_iou) {
|
||||
best_iou = iou;
|
||||
}
|
||||
}
|
||||
avg_iou += best_iou;
|
||||
if(best_iou > iou_thresh){
|
||||
if (best_iou > iou_thresh) {
|
||||
++correct;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
|
||||
fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total,
|
||||
(float) proposals / (i + 1), avg_iou * 100 / total, 100. * correct / total);
|
||||
free(id);
|
||||
free_image(orig);
|
||||
free_image(sized);
|
||||
|
@ -559,8 +560,8 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
|
|||
}
|
||||
|
||||
|
||||
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
|
||||
{
|
||||
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh,
|
||||
char *outfile, int fullscreen) {
|
||||
list *options = read_data_cfg(datacfg);
|
||||
char *name_list = option_find_str(options, "names", "data/names.list");
|
||||
char **names = get_labels(name_list);
|
||||
|
@ -572,30 +573,30 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||
double time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
float nms=.45;
|
||||
while(1){
|
||||
if(filename){
|
||||
float nms = .45;
|
||||
while (1) {
|
||||
if (filename) {
|
||||
strncpy(input, filename, 256);
|
||||
} else {
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
if (!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input,0,0);
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = letterbox_image(im, net->w, net->h);
|
||||
//image sized = resize_image(im, net->w, net->h);
|
||||
//image sized2 = resize_max(im, net->w);
|
||||
//image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
|
||||
//resize_network(net, sized.w, sized.h);
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
|
||||
|
||||
float *X = sized.data;
|
||||
time=what_time_is_it_now();
|
||||
time = what_time_is_it_now();
|
||||
network_predict(net, X);
|
||||
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
|
||||
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now() - time);
|
||||
int nboxes = 0;
|
||||
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
|
||||
//printf("%d\n", nboxes);
|
||||
|
@ -603,10 +604,9 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
|
||||
draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
|
||||
free_detections(dets, nboxes);
|
||||
if(outfile){
|
||||
if (outfile) {
|
||||
save_image(im, outfile);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
save_image(im, "predictions");
|
||||
#ifdef OPENCV
|
||||
make_window("predictions", 512, 512, 0);
|
||||
|
@ -621,7 +621,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
|||
}
|
||||
|
||||
/*
|
||||
void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
|
||||
void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int nclass, float thresh, int skip)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
char *base = basecfg(cfgfile);
|
||||
|
@ -668,7 +668,7 @@ void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
|
||||
|
||||
for(i = 0; i < nboxes; ++i){
|
||||
if(dets[i].prob[class] > thresh){
|
||||
if(dets[i].prob[nclass] > thresh){
|
||||
box b = dets[i].bbox;
|
||||
int left = b.x-b.w/2.;
|
||||
int top = b.y-b.h/2.;
|
||||
|
@ -694,7 +694,7 @@ void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
|
|||
#endif
|
||||
}
|
||||
|
||||
void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
|
||||
void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int nclass, float thresh, int skip)
|
||||
{
|
||||
#ifdef OPENCV
|
||||
char *base = basecfg(cfgfile);
|
||||
|
@ -744,7 +744,7 @@ void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_in
|
|||
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
|
||||
|
||||
for(i = 0; i < nboxes; ++i){
|
||||
if(dets[i].prob[class] > thresh){
|
||||
if(dets[i].prob[nclass] > thresh){
|
||||
box b = dets[i].bbox;
|
||||
int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h;
|
||||
int dx = b.x*in.w-size/2.;
|
||||
|
@ -786,15 +786,14 @@ void network_detect(network *net, image im, float thresh, float hier_thresh, flo
|
|||
}
|
||||
*/
|
||||
|
||||
void run_detector(int argc, char **argv)
|
||||
{
|
||||
void run_detector(int argc, char **argv) {
|
||||
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
|
||||
float thresh = find_float_arg(argc, argv, "-thresh", .5);
|
||||
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int frame_skip = find_int_arg(argc, argv, "-s", 0);
|
||||
int avg = find_int_arg(argc, argv, "-avg", 3);
|
||||
if(argc < 4){
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
@ -803,18 +802,18 @@ void run_detector(int argc, char **argv)
|
|||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
if (gpu_list) {
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus = (int *) calloc(ngpus, sizeof(int));
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
gpu_list = strchr(gpu_list, ',') + 1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
|
@ -827,24 +826,26 @@ void run_detector(int argc, char **argv)
|
|||
int width = find_int_arg(argc, argv, "-w", 0);
|
||||
int height = find_int_arg(argc, argv, "-h", 0);
|
||||
int fps = find_int_arg(argc, argv, "-fps", 0);
|
||||
//int class = find_int_arg(argc, argv, "-class", 0);
|
||||
//int nclass = find_int_arg(argc, argv, "-nclass", 0);
|
||||
|
||||
char *datacfg = argv[3];
|
||||
char *cfg = argv[4];
|
||||
char *weights = (argc > 5) ? argv[5] : 0;
|
||||
char *filename = (argc > 6) ? argv[6]: 0;
|
||||
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
|
||||
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
|
||||
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
|
||||
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "demo")) {
|
||||
char *filename = (argc > 6) ? argv[6] : 0;
|
||||
if (0 == strcmp(argv[2], "test"))
|
||||
test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
|
||||
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
|
||||
else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
|
||||
else if (0 == strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
|
||||
else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "demo")) {
|
||||
list *options = read_data_cfg(datacfg);
|
||||
int classes = option_find_int(options, "classes", 20);
|
||||
char *name_list = option_find_str(options, "names", "data/names.list");
|
||||
char **names = get_labels(name_list);
|
||||
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
|
||||
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width,
|
||||
height, fps, fullscreen);
|
||||
}
|
||||
//else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
|
||||
//else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
|
||||
//else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, nclass, thresh, frame_skip);
|
||||
//else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, nclass, thresh, frame_skip);
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
# Stupid python path shit.
|
||||
# Instead just add darknet.py to somewhere in your python path
|
||||
# OK actually that might not be a great idea, idk, work in progress
|
||||
# Use at your own risk. or don't, i don't care
|
||||
|
||||
import sys, os
|
||||
sys.path.append(os.path.join(os.getcwd(),'python/'))
|
||||
|
||||
import darknet as dn
|
||||
import pdb
|
||||
|
||||
dn.set_gpu(0)
|
||||
net = dn.load_net("cfg/yolo-thor.cfg", "/home/pjreddie/backup/yolo-thor_final.weights", 0)
|
||||
meta = dn.load_meta("cfg/thor.data")
|
||||
r = dn.detect(net, meta, "data/bedroom.jpg")
|
||||
print r
|
||||
|
||||
# And then down here you could detect a lot more images like:
|
||||
r = dn.detect(net, meta, "data/eagle.jpg")
|
||||
print r
|
||||
r = dn.detect(net, meta, "data/giraffe.jpg")
|
||||
print r
|
||||
r = dn.detect(net, meta, "data/horses.jpg")
|
||||
print r
|
||||
r = dn.detect(net, meta, "data/person.jpg")
|
||||
print r
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -3,19 +3,19 @@
|
|||
#include <assert.h>
|
||||
|
||||
void normalize_image2(image p);
|
||||
void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display)
|
||||
{
|
||||
|
||||
void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) {
|
||||
int i;
|
||||
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network **nets = calloc(ngpus, sizeof(network*));
|
||||
network **nets = (network **) calloc(ngpus, sizeof(network * ));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
|
@ -29,9 +29,9 @@ void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
|
||||
image embed = pred;
|
||||
embed.c = 3;
|
||||
embed.data += embed.w*embed.h*80;
|
||||
embed.data += embed.w * embed.h * 80;
|
||||
|
||||
int div = net->w/pred.w;
|
||||
int div = net->w / pred.w;
|
||||
assert(pred.w * div == net->w);
|
||||
assert(pred.h * div == net->h);
|
||||
|
||||
|
@ -44,7 +44,7 @@ void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
char *train_list = option_find_str(options, "train", "data/train.list");
|
||||
|
||||
list *plist = get_paths(train_list);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
|
||||
|
@ -76,15 +76,15 @@ void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
args.d = &buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
int epoch = (*net->seen)/N;
|
||||
while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
|
||||
int epoch = (*net->seen) / N;
|
||||
while (get_current_batch(net) < net->max_batches || net->max_batches == 0) {
|
||||
double time = what_time_is_it_now();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
|
||||
printf("Loaded: %lf seconds\n", what_time_is_it_now() - time);
|
||||
time = what_time_is_it_now();
|
||||
|
||||
float loss = 0;
|
||||
|
@ -97,9 +97,10 @@ void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
#else
|
||||
loss = train_network(net, train);
|
||||
#endif
|
||||
if(display){
|
||||
image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]);
|
||||
image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
|
||||
if (display) {
|
||||
image tr = float_to_image(net->w / div, net->h / div, 80,
|
||||
train.y.vals[net->batch * (net->subdivisions - 1)]);
|
||||
image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch * (net->subdivisions - 1)]);
|
||||
pred.c = 80;
|
||||
image mask = mask_to_rgb(tr);
|
||||
image prmask = mask_to_rgb(pred);
|
||||
|
@ -114,19 +115,21 @@ void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
free_image(mask);
|
||||
free_image(prmask);
|
||||
}
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
|
||||
if (avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss * .9 + loss * .1;
|
||||
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net),
|
||||
(float) (*net->seen) / N, loss, avg_loss, get_current_rate(net), what_time_is_it_now() - time,
|
||||
*net->seen);
|
||||
free_data(train);
|
||||
if(*net->seen/N > epoch){
|
||||
epoch = *net->seen/N;
|
||||
if (*net->seen / N > epoch) {
|
||||
epoch = *net->seen / N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(get_current_batch(net)%100 == 0){
|
||||
if (get_current_batch(net) % 100 == 0) {
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
|
@ -135,13 +138,12 @@ void train_isegmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
save_weights(net, buff);
|
||||
|
||||
free_network(net);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_ptrs((void **) paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename)
|
||||
{
|
||||
void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename) {
|
||||
network *net = load_network(cfg, weights, 0);
|
||||
set_batch_network(net, 1);
|
||||
srand(2222222);
|
||||
|
@ -149,26 +151,26 @@ void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename
|
|||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
while (1) {
|
||||
if (filename) {
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
} else {
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
if (!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = letterbox_image(im, net->w, net->h);
|
||||
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
time = clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
image pred = get_network_image(net);
|
||||
image prmask = mask_to_rgb(pred);
|
||||
printf("Predicted: %f\n", predictions[0]);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock() - time));
|
||||
show_image(sized, "orig", 1);
|
||||
show_image(prmask, "pred", 0);
|
||||
free_image(im);
|
||||
|
@ -179,8 +181,7 @@ void predict_isegmenter(char *datafile, char *cfg, char *weights, char *filename
|
|||
}
|
||||
|
||||
|
||||
void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename)
|
||||
{
|
||||
void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) {
|
||||
#ifdef OPENCV
|
||||
printf("Classifier Demo\n");
|
||||
network *net = load_network(cfg, weights, 0);
|
||||
|
@ -222,9 +223,8 @@ void demo_isegmenter(char *datacfg, char *cfg, char *weights, int cam_index, con
|
|||
}
|
||||
|
||||
|
||||
void run_isegmenter(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
void run_isegmenter(int argc, char **argv) {
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
@ -233,18 +233,18 @@ void run_isegmenter(int argc, char **argv)
|
|||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
if (gpu_list) {
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus = (int *) calloc(ngpus, sizeof(int));
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
gpu_list = strchr(gpu_list, ',') + 1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
|
@ -258,10 +258,10 @@ void run_isegmenter(int argc, char **argv)
|
|||
char *data = argv[3];
|
||||
char *cfg = argv[4];
|
||||
char *weights = (argc > 5) ? argv[5] : 0;
|
||||
char *filename = (argc > 6) ? argv[6]: 0;
|
||||
if(0==strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename);
|
||||
char *filename = (argc > 6) ? argv[6] : 0;
|
||||
if (0 == strcmp(argv[2], "test")) predict_isegmenter(data, cfg, weights, filename);
|
||||
else if (0 == strcmp(argv[2], "train")) train_isegmenter(data, cfg, weights, gpus, ngpus, clear, display);
|
||||
else if (0 == strcmp(argv[2], "demo")) demo_isegmenter(data, cfg, weights, cam_index, filename);
|
||||
}
|
||||
|
||||
|
|
@ -682,7 +682,7 @@ void train_dcgan(char *cfg, char *weight, char *acfg, char *aweight, int clear,
|
|||
//float orig_rate = anet->learning_rate;
|
||||
|
||||
int i, j, k;
|
||||
layer imlayer = {0};
|
||||
layer imlayer = {(LAYER_TYPE)0};
|
||||
for (i = 0; i < gnet->n; ++i) {
|
||||
if (gnet->layers[i].out_c == 3) {
|
||||
imlayer = gnet->layers[i];
|
||||
|
@ -878,7 +878,7 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
|
|||
network *anet = load_network(acfg, aweight, clear);
|
||||
|
||||
int i, j, k;
|
||||
layer imlayer = {0};
|
||||
layer imlayer = {(LAYER_TYPE)0};
|
||||
for (i = 0; i < net->n; ++i) {
|
||||
if (net->layers[i].out_c == 3) {
|
||||
imlayer = net->layers[i];
|
||||
|
@ -914,8 +914,8 @@ void train_colorizer(char *cfg, char *weight, char *acfg, char *aweight, int cle
|
|||
//int y_size = x_size;
|
||||
net->delta = 0;
|
||||
net->train = 1;
|
||||
float *pixs = calloc(x_size, sizeof(float));
|
||||
float *graypixs = calloc(x_size, sizeof(float));
|
||||
float *pixs = (float *) calloc(x_size, sizeof(float));
|
||||
float *graypixs = (float *) calloc(x_size, sizeof(float));
|
||||
//float *y = calloc(y_size, sizeof(float));
|
||||
|
||||
//int ay_size = anet->outputs*anet->batch;
|
|
@ -2,19 +2,18 @@
|
|||
#include <sys/time.h>
|
||||
#include <assert.h>
|
||||
|
||||
void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
|
||||
{
|
||||
void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear) {
|
||||
int i;
|
||||
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network **nets = calloc(ngpus, sizeof(network*));
|
||||
network **nets = (network **) calloc(ngpus, sizeof(network * ));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
|
@ -35,7 +34,7 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
int classes = option_find_int(options, "classes", 1);
|
||||
|
||||
list *plist = get_paths(train_list);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
clock_t time;
|
||||
|
@ -46,8 +45,8 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
args.threads = 32;
|
||||
args.classes = classes;
|
||||
|
||||
args.min = net->min_ratio*net->w;
|
||||
args.max = net->max_ratio*net->w;
|
||||
args.min = net->min_ratio * net->w;
|
||||
args.max = net->max_ratio * net->w;
|
||||
args.angle = net->angle;
|
||||
args.aspect = net->aspect;
|
||||
args.exposure = net->exposure;
|
||||
|
@ -66,16 +65,16 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
args.d = &buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
int epoch = (*net->seen)/N;
|
||||
while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
|
||||
time=clock();
|
||||
int epoch = (*net->seen) / N;
|
||||
while (get_current_batch(net) < net->max_batches || net->max_batches == 0) {
|
||||
time = clock();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
printf("Loaded: %lf seconds\n", sec(clock() - time));
|
||||
time = clock();
|
||||
|
||||
float loss = 0;
|
||||
#ifdef GPU
|
||||
|
@ -87,19 +86,20 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
#else
|
||||
loss = train_network(net, train);
|
||||
#endif
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net->seen);
|
||||
if (avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss * .9 + loss * .1;
|
||||
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net),
|
||||
(float) (*net->seen) / N, loss, avg_loss, get_current_rate(net), sec(clock() - time), *net->seen);
|
||||
free_data(train);
|
||||
if(*net->seen/N > epoch){
|
||||
epoch = *net->seen/N;
|
||||
if (*net->seen / N > epoch) {
|
||||
epoch = *net->seen / N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(get_current_batch(net)%100 == 0){
|
||||
if (get_current_batch(net) % 100 == 0) {
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
|
@ -108,13 +108,12 @@ void train_regressor(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
save_weights(net, buff);
|
||||
|
||||
free_network(net);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_ptrs((void **) paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void predict_regressor(char *cfgfile, char *weightfile, char *filename)
|
||||
{
|
||||
void predict_regressor(char *cfgfile, char *weightfile, char *filename) {
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
set_batch_network(net, 1);
|
||||
srand(2222222);
|
||||
|
@ -122,24 +121,24 @@ void predict_regressor(char *cfgfile, char *weightfile, char *filename)
|
|||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
while (1) {
|
||||
if (filename) {
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
} else {
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
if (!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = letterbox_image(im, net->w, net->h);
|
||||
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
time = clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
printf("Predicted: %f\n", predictions[0]);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock() - time));
|
||||
free_image(im);
|
||||
free_image(sized);
|
||||
if (filename) break;
|
||||
|
@ -147,8 +146,7 @@ void predict_regressor(char *cfgfile, char *weightfile, char *filename)
|
|||
}
|
||||
|
||||
|
||||
void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
|
||||
{
|
||||
void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) {
|
||||
#ifdef OPENCV
|
||||
printf("Regressor Demo\n");
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
|
@ -196,9 +194,8 @@ void demo_regressor(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
|
|||
}
|
||||
|
||||
|
||||
void run_regressor(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
void run_regressor(int argc, char **argv) {
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
@ -207,18 +204,18 @@ void run_regressor(int argc, char **argv)
|
|||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
if (gpu_list) {
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus = (int *) calloc(ngpus, sizeof(int));
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
gpu_list = strchr(gpu_list, ',') + 1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
|
@ -231,10 +228,10 @@ void run_regressor(int argc, char **argv)
|
|||
char *data = argv[3];
|
||||
char *cfg = argv[4];
|
||||
char *weights = (argc > 5) ? argv[5] : 0;
|
||||
char *filename = (argc > 6) ? argv[6]: 0;
|
||||
if(0==strcmp(argv[2], "test")) predict_regressor(data, cfg, weights);
|
||||
else if(0==strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename);
|
||||
char *filename = (argc > 6) ? argv[6] : 0;
|
||||
if (0 == strcmp(argv[2], "test")) predict_regressor(data, cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "train")) train_regressor(data, cfg, weights, gpus, ngpus, clear);
|
||||
else if (0 == strcmp(argv[2], "demo")) demo_regressor(data, cfg, weights, cam_index, filename);
|
||||
}
|
||||
|
||||
|
|
@ -11,7 +11,7 @@ unsigned char **load_files(char *filename, int *n)
|
|||
{
|
||||
list *paths = get_paths(filename);
|
||||
*n = paths->size;
|
||||
unsigned char **contents = calloc(*n, sizeof(char *));
|
||||
unsigned char **contents = (unsigned char **)calloc(*n, sizeof(char *));
|
||||
int i;
|
||||
node *x = paths->front;
|
||||
for(i = 0; i < *n; ++i){
|
||||
|
@ -26,20 +26,20 @@ int *read_tokenized_data(char *filename, size_t *read)
|
|||
size_t size = 512;
|
||||
size_t count = 0;
|
||||
FILE *fp = fopen(filename, "r");
|
||||
int *d = calloc(size, sizeof(int));
|
||||
int *d = (int *)calloc(size, sizeof(int));
|
||||
int n, one;
|
||||
one = fscanf(fp, "%d", &n);
|
||||
while(one == 1){
|
||||
++count;
|
||||
if(count > size){
|
||||
size = size*2;
|
||||
d = realloc(d, size*sizeof(int));
|
||||
d = (int *) realloc(d, size*sizeof(int));
|
||||
}
|
||||
d[count-1] = n;
|
||||
one = fscanf(fp, "%d", &n);
|
||||
}
|
||||
fclose(fp);
|
||||
d = realloc(d, count*sizeof(int));
|
||||
d = (int *) realloc(d, count*sizeof(int));
|
||||
*read = count;
|
||||
return d;
|
||||
}
|
||||
|
@ -49,19 +49,19 @@ char **read_tokens(char *filename, size_t *read)
|
|||
size_t size = 512;
|
||||
size_t count = 0;
|
||||
FILE *fp = fopen(filename, "r");
|
||||
char **d = calloc(size, sizeof(char *));
|
||||
char **d = (char **)calloc(size, sizeof(char *));
|
||||
char *line;
|
||||
while((line=fgetl(fp)) != 0){
|
||||
++count;
|
||||
if(count > size){
|
||||
size = size*2;
|
||||
d = realloc(d, size*sizeof(char *));
|
||||
d = (char **) realloc(d, size*sizeof(char *));
|
||||
}
|
||||
if(0==strcmp(line, "<NEWLINE>")) line = "\n";
|
||||
d[count-1] = line;
|
||||
}
|
||||
fclose(fp);
|
||||
d = realloc(d, count*sizeof(char *));
|
||||
d = (char **) realloc(d, count*sizeof(char *));
|
||||
*read = count;
|
||||
return d;
|
||||
}
|
||||
|
@ -69,8 +69,8 @@ char **read_tokens(char *filename, size_t *read)
|
|||
|
||||
float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps)
|
||||
{
|
||||
float *x = calloc(batch * steps * characters, sizeof(float));
|
||||
float *y = calloc(batch * steps * characters, sizeof(float));
|
||||
float *x = (float*) calloc(batch * steps * characters, sizeof(float));
|
||||
float *y = (float*) calloc(batch * steps * characters, sizeof(float));
|
||||
int i,j;
|
||||
for(i = 0; i < batch; ++i){
|
||||
for(j = 0; j < steps; ++j){
|
||||
|
@ -96,8 +96,8 @@ float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size
|
|||
float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, size_t len, int batch, int steps)
|
||||
{
|
||||
int i,j;
|
||||
float *x = calloc(batch * steps * characters, sizeof(float));
|
||||
float *y = calloc(batch * steps * characters, sizeof(float));
|
||||
float *x = (float*) calloc(batch * steps * characters, sizeof(float));
|
||||
float *y = (float*) calloc(batch * steps * characters, sizeof(float));
|
||||
for(i = 0; i < batch; ++i){
|
||||
int index = rand()%n;
|
||||
//int slen = strlen(source[index]);
|
||||
|
@ -126,8 +126,8 @@ float_pair get_seq2seq_data(char **source, char **dest, int n, int characters, s
|
|||
|
||||
float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps)
|
||||
{
|
||||
float *x = calloc(batch * steps * characters, sizeof(float));
|
||||
float *y = calloc(batch * steps * characters, sizeof(float));
|
||||
float *x = (float*) calloc(batch * steps * characters, sizeof(float));
|
||||
float *y = (float*) calloc(batch * steps * characters, sizeof(float));
|
||||
int i,j;
|
||||
for(i = 0; i < batch; ++i){
|
||||
for(j = 0; j < steps; ++j){
|
||||
|
@ -181,7 +181,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
|
|||
int i = (*net->seen)/net->batch;
|
||||
|
||||
int streams = batch/steps;
|
||||
size_t *offsets = calloc(streams, sizeof(size_t));
|
||||
size_t *offsets = (size_t *)calloc(streams, sizeof(size_t));
|
||||
int j;
|
||||
for(j = 0; j < streams; ++j){
|
||||
offsets[j] = rand_size_t()%size;
|
||||
|
@ -261,7 +261,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
|
|||
for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;
|
||||
int c = 0;
|
||||
int len = strlen(seed);
|
||||
float *input = calloc(inputs, sizeof(float));
|
||||
float *input = (float*) calloc(inputs, sizeof(float));
|
||||
|
||||
/*
|
||||
fill_cpu(inputs, 0, input, 1);
|
||||
|
@ -314,7 +314,7 @@ void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp,
|
|||
int i, j;
|
||||
for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;
|
||||
int c = 0;
|
||||
float *input = calloc(inputs, sizeof(float));
|
||||
float *input = (float*) calloc(inputs, sizeof(float));
|
||||
float *out = 0;
|
||||
|
||||
while(1){
|
||||
|
@ -359,7 +359,7 @@ void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int r
|
|||
int i, j;
|
||||
for(i = 0; i < net->n; ++i) net->layers[i].temperature = temp;
|
||||
int c = 0;
|
||||
float *input = calloc(inputs, sizeof(float));
|
||||
float *input = (float*) calloc(inputs, sizeof(float));
|
||||
float *out = 0;
|
||||
|
||||
while((c = getc(stdin)) != EOF){
|
||||
|
@ -395,7 +395,7 @@ void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
|
|||
int words = 1;
|
||||
int c;
|
||||
int len = strlen(seed);
|
||||
float *input = calloc(inputs, sizeof(float));
|
||||
float *input = (float*) calloc(inputs, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
c = seed[i];
|
||||
|
@ -444,7 +444,7 @@ void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
|
|||
int words = 1;
|
||||
int c;
|
||||
int len = strlen(seed);
|
||||
float *input = calloc(inputs, sizeof(float));
|
||||
float *input = (float*) calloc(inputs, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
c = seed[i];
|
||||
|
@ -480,7 +480,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
|
|||
|
||||
int c;
|
||||
int seed_len = strlen(seed);
|
||||
float *input = calloc(inputs, sizeof(float));
|
||||
float *input = (float*) calloc(inputs, sizeof(float));
|
||||
int i;
|
||||
char *line;
|
||||
while((line=fgetl(stdin)) != 0){
|
|
@ -19,10 +19,10 @@ float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int ste
|
|||
image out_im = get_network_image(net);
|
||||
int output_size = out_im.w*out_im.h*out_im.c;
|
||||
printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
|
||||
float *feats = calloc(net.batch*batch*output_size, sizeof(float));
|
||||
float *feats = (float*) calloc(net.batch*batch*output_size, sizeof(float));
|
||||
for(b = 0; b < batch; ++b){
|
||||
int input_size = net.w*net.h*net.c;
|
||||
float *input = calloc(input_size*net.batch, sizeof(float));
|
||||
float *input = (float*) calloc(input_size*net.batch, sizeof(float));
|
||||
char *filename = files[rand()%n];
|
||||
CvCapture *cap = cvCaptureFromFile(filename);
|
||||
int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT);
|
||||
|
@ -183,9 +183,9 @@ void generate_vid_rnn(char *cfgfile, char *weightfile)
|
|||
}
|
||||
for(i = 0; i < 30; ++i){
|
||||
next = network_predict(net, next);
|
||||
image new = save_reconstruction(extractor, &last, next, "new", i);
|
||||
image new_image = save_reconstruction(extractor, &last, next, "new_image", i);
|
||||
free_image(last);
|
||||
last = new;
|
||||
last = new_image;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -203,6 +203,8 @@ void run_vid_rnn(int argc, char **argv)
|
|||
else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights);
|
||||
}
|
||||
#else
|
||||
void run_vid_rnn(int argc, char **argv){}
|
||||
|
||||
void run_vid_rnn(int argc, char **argv) {}
|
||||
|
||||
#endif
|
||||
|
|
@ -2,19 +2,18 @@
|
|||
#include <sys/time.h>
|
||||
#include <assert.h>
|
||||
|
||||
void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display)
|
||||
{
|
||||
void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int display) {
|
||||
int i;
|
||||
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
printf("%s\n", base);
|
||||
printf("%d\n", ngpus);
|
||||
network **nets = calloc(ngpus, sizeof(network*));
|
||||
network **nets = (network **) calloc(ngpus, sizeof(network * ));
|
||||
|
||||
srand(time(0));
|
||||
int seed = rand();
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
srand(seed);
|
||||
#ifdef GPU
|
||||
cuda_set_device(gpus[i]);
|
||||
|
@ -26,7 +25,7 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
network *net = nets[0];
|
||||
image pred = get_network_image(net);
|
||||
|
||||
int div = net->w/pred.w;
|
||||
int div = net->w / pred.w;
|
||||
assert(pred.w * div == net->w);
|
||||
assert(pred.h * div == net->h);
|
||||
|
||||
|
@ -39,7 +38,7 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
char *train_list = option_find_str(options, "train", "data/train.list");
|
||||
|
||||
list *plist = get_paths(train_list);
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
printf("%d\n", plist->size);
|
||||
int N = plist->size;
|
||||
|
||||
|
@ -70,15 +69,15 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
args.d = &buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
int epoch = (*net->seen)/N;
|
||||
while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
|
||||
int epoch = (*net->seen) / N;
|
||||
while (get_current_batch(net) < net->max_batches || net->max_batches == 0) {
|
||||
double time = what_time_is_it_now();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
|
||||
printf("Loaded: %lf seconds\n", what_time_is_it_now() - time);
|
||||
time = what_time_is_it_now();
|
||||
|
||||
float loss = 0;
|
||||
|
@ -91,9 +90,10 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
#else
|
||||
loss = train_network(net, train);
|
||||
#endif
|
||||
if(display){
|
||||
image tr = float_to_image(net->w/div, net->h/div, 80, train.y.vals[net->batch*(net->subdivisions-1)]);
|
||||
image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch*(net->subdivisions-1)]);
|
||||
if (display) {
|
||||
image tr = float_to_image(net->w / div, net->h / div, 80,
|
||||
train.y.vals[net->batch * (net->subdivisions - 1)]);
|
||||
image im = float_to_image(net->w, net->h, net->c, train.X.vals[net->batch * (net->subdivisions - 1)]);
|
||||
image mask = mask_to_rgb(tr);
|
||||
image prmask = mask_to_rgb(pred);
|
||||
show_image(im, "input", 1);
|
||||
|
@ -102,19 +102,21 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
free_image(mask);
|
||||
free_image(prmask);
|
||||
}
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
|
||||
if (avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss * .9 + loss * .1;
|
||||
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net),
|
||||
(float) (*net->seen) / N, loss, avg_loss, get_current_rate(net), what_time_is_it_now() - time,
|
||||
*net->seen);
|
||||
free_data(train);
|
||||
if(*net->seen/N > epoch){
|
||||
epoch = *net->seen/N;
|
||||
if (*net->seen / N > epoch) {
|
||||
epoch = *net->seen / N;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, epoch);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(get_current_batch(net)%100 == 0){
|
||||
if (get_current_batch(net) % 100 == 0) {
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s.backup",backup_directory,base);
|
||||
sprintf(buff, "%s/%s.backup", backup_directory, base);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
}
|
||||
|
@ -123,13 +125,12 @@ void train_segmenter(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
|
|||
save_weights(net, buff);
|
||||
|
||||
free_network(net);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_ptrs((void **) paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename)
|
||||
{
|
||||
void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename) {
|
||||
network *net = load_network(cfg, weights, 0);
|
||||
set_batch_network(net, 1);
|
||||
srand(2222222);
|
||||
|
@ -137,26 +138,26 @@ void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename)
|
|||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
while(1){
|
||||
if(filename){
|
||||
while (1) {
|
||||
if (filename) {
|
||||
strncpy(input, filename, 256);
|
||||
}else{
|
||||
} else {
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
if (!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = letterbox_image(im, net->w, net->h);
|
||||
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
time = clock();
|
||||
float *predictions = network_predict(net, X);
|
||||
image pred = get_network_image(net);
|
||||
image prmask = mask_to_rgb(pred);
|
||||
printf("Predicted: %f\n", predictions[0]);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock() - time));
|
||||
show_image(sized, "orig", 1);
|
||||
show_image(prmask, "pred", 0);
|
||||
free_image(im);
|
||||
|
@ -167,8 +168,7 @@ void predict_segmenter(char *datafile, char *cfg, char *weights, char *filename)
|
|||
}
|
||||
|
||||
|
||||
void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename)
|
||||
{
|
||||
void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, const char *filename) {
|
||||
#ifdef OPENCV
|
||||
printf("Classifier Demo\n");
|
||||
network *net = load_network(cfg, weights, 0);
|
||||
|
@ -210,9 +210,8 @@ void demo_segmenter(char *datacfg, char *cfg, char *weights, int cam_index, cons
|
|||
}
|
||||
|
||||
|
||||
void run_segmenter(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
void run_segmenter(int argc, char **argv) {
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
@ -221,18 +220,18 @@ void run_segmenter(int argc, char **argv)
|
|||
int *gpus = 0;
|
||||
int gpu = 0;
|
||||
int ngpus = 0;
|
||||
if(gpu_list){
|
||||
if (gpu_list) {
|
||||
printf("%s\n", gpu_list);
|
||||
int len = strlen(gpu_list);
|
||||
ngpus = 1;
|
||||
int i;
|
||||
for(i = 0; i < len; ++i){
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (gpu_list[i] == ',') ++ngpus;
|
||||
}
|
||||
gpus = calloc(ngpus, sizeof(int));
|
||||
for(i = 0; i < ngpus; ++i){
|
||||
gpus = (int *) calloc(ngpus, sizeof(int));
|
||||
for (i = 0; i < ngpus; ++i) {
|
||||
gpus[i] = atoi(gpu_list);
|
||||
gpu_list = strchr(gpu_list, ',')+1;
|
||||
gpu_list = strchr(gpu_list, ',') + 1;
|
||||
}
|
||||
} else {
|
||||
gpu = gpu_index;
|
||||
|
@ -246,10 +245,10 @@ void run_segmenter(int argc, char **argv)
|
|||
char *data = argv[3];
|
||||
char *cfg = argv[4];
|
||||
char *weights = (argc > 5) ? argv[5] : 0;
|
||||
char *filename = (argc > 6) ? argv[6]: 0;
|
||||
if(0==strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename);
|
||||
else if(0==strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display);
|
||||
else if(0==strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename);
|
||||
char *filename = (argc > 6) ? argv[6] : 0;
|
||||
if (0 == strcmp(argv[2], "test")) predict_segmenter(data, cfg, weights, filename);
|
||||
else if (0 == strcmp(argv[2], "train")) train_segmenter(data, cfg, weights, gpus, ngpus, clear, display);
|
||||
else if (0 == strcmp(argv[2], "demo")) demo_segmenter(data, cfg, weights, cam_index, filename);
|
||||
}
|
||||
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
#include "darknet.h"
|
||||
|
||||
char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
|
||||
char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
|
||||
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
|
||||
"tvmonitor"};
|
||||
|
||||
void train_yolo(char *cfgfile, char *weightfile)
|
||||
{
|
||||
void train_yolo(char *cfgfile, char *weightfile) {
|
||||
char *train_images = "/data/voc/train.txt";
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
srand(time(0));
|
||||
|
@ -12,8 +13,8 @@ void train_yolo(char *cfgfile, char *weightfile)
|
|||
float avg_loss = -1;
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
|
||||
int imgs = net->batch*net->subdivisions;
|
||||
int i = *net->seen/imgs;
|
||||
int imgs = net->batch * net->subdivisions;
|
||||
int i = *net->seen / imgs;
|
||||
data train, buffer;
|
||||
|
||||
|
||||
|
@ -25,7 +26,7 @@ void train_yolo(char *cfgfile, char *weightfile)
|
|||
|
||||
list *plist = get_paths(train_images);
|
||||
//int N = plist->size;
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net->w;
|
||||
|
@ -47,22 +48,23 @@ void train_yolo(char *cfgfile, char *weightfile)
|
|||
pthread_t load_thread = load_data_in_thread(args);
|
||||
clock_t time;
|
||||
//while(i*imgs < N*120){
|
||||
while(get_current_batch(net) < net->max_batches){
|
||||
while (get_current_batch(net) < net->max_batches) {
|
||||
i += 1;
|
||||
time=clock();
|
||||
time = clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
load_thread = load_data_in_thread(args);
|
||||
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
printf("Loaded: %lf seconds\n", sec(clock() - time));
|
||||
|
||||
time=clock();
|
||||
time = clock();
|
||||
float loss = train_network(net, train);
|
||||
if (avg_loss < 0) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
avg_loss = avg_loss * .9 + loss * .1;
|
||||
|
||||
printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
|
||||
if(i%1000==0 || (i < 1000 && i%100 == 0)){
|
||||
printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net),
|
||||
sec(clock() - time), i * imgs);
|
||||
if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) {
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||
save_weights(net, buff);
|
||||
|
@ -74,29 +76,28 @@ void train_yolo(char *cfgfile, char *weightfile)
|
|||
save_weights(net, buff);
|
||||
}
|
||||
|
||||
void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets)
|
||||
{
|
||||
void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets) {
|
||||
int i, j;
|
||||
for(i = 0; i < total; ++i){
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
|
||||
for (i = 0; i < total; ++i) {
|
||||
float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.;
|
||||
float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.;
|
||||
float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.;
|
||||
float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.;
|
||||
|
||||
if (xmin < 0) xmin = 0;
|
||||
if (ymin < 0) ymin = 0;
|
||||
if (xmax > w) xmax = w;
|
||||
if (ymax > h) ymax = h;
|
||||
|
||||
for(j = 0; j < classes; ++j){
|
||||
if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
|
||||
xmin, ymin, xmax, ymax);
|
||||
for (j = 0; j < classes; ++j) {
|
||||
if (dets[i].prob[j])
|
||||
fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
|
||||
xmin, ymin, xmax, ymax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void validate_yolo(char *cfg, char *weights)
|
||||
{
|
||||
void validate_yolo(char *cfg, char *weights) {
|
||||
network *net = load_network(cfg, weights, 0);
|
||||
set_batch_network(net, 1);
|
||||
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
|
||||
|
@ -106,21 +107,21 @@ void validate_yolo(char *cfg, char *weights)
|
|||
//list *plist = get_paths("data/voc.2007.test");
|
||||
list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt");
|
||||
//list *plist = get_paths("data/voc.2012.test");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
int classes = l.classes;
|
||||
|
||||
int j;
|
||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
||||
for(j = 0; j < classes; ++j){
|
||||
FILE **fps = (FILE **) calloc(classes, sizeof(FILE * ));
|
||||
for (j = 0; j < classes; ++j) {
|
||||
char buff[1024];
|
||||
snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
int i = 0;
|
||||
int t;
|
||||
|
||||
float thresh = .001;
|
||||
|
@ -128,39 +129,39 @@ void validate_yolo(char *cfg, char *weights)
|
|||
float iou_thresh = .5;
|
||||
|
||||
int nthreads = 8;
|
||||
image *val = calloc(nthreads, sizeof(image));
|
||||
image *val_resized = calloc(nthreads, sizeof(image));
|
||||
image *buf = calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
|
||||
image *val = (image *) calloc(nthreads, sizeof(image));
|
||||
image *val_resized = (image *) calloc(nthreads, sizeof(image));
|
||||
image *buf = (image *) calloc(nthreads, sizeof(image));
|
||||
image *buf_resized = (image *) calloc(nthreads, sizeof(image));
|
||||
pthread_t *thr = (pthread_t *) calloc(nthreads, sizeof(pthread_t));
|
||||
|
||||
load_args args = {0};
|
||||
args.w = net->w;
|
||||
args.h = net->h;
|
||||
args.type = IMAGE_DATA;
|
||||
|
||||
for(t = 0; t < nthreads; ++t){
|
||||
args.path = paths[i+t];
|
||||
for (t = 0; t < nthreads; ++t) {
|
||||
args.path = paths[i + t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
time_t start = time(0);
|
||||
for(i = nthreads; i < m+nthreads; i += nthreads){
|
||||
for (i = nthreads; i < m + nthreads; i += nthreads) {
|
||||
fprintf(stderr, "%d\n", i);
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
|
||||
pthread_join(thr[t], 0);
|
||||
val[t] = buf[t];
|
||||
val_resized[t] = buf_resized[t];
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t < m; ++t){
|
||||
args.path = paths[i+t];
|
||||
for (t = 0; t < nthreads && i + t < m; ++t) {
|
||||
args.path = paths[i + t];
|
||||
args.im = &buf[t];
|
||||
args.resized = &buf_resized[t];
|
||||
thr[t] = load_data_in_thread(args);
|
||||
}
|
||||
for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
|
||||
char *path = paths[i+t-nthreads];
|
||||
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
|
||||
char *path = paths[i + t - nthreads];
|
||||
char *id = basecfg(path);
|
||||
float *X = val_resized[t].data;
|
||||
network_predict(net, X);
|
||||
|
@ -168,19 +169,18 @@ void validate_yolo(char *cfg, char *weights)
|
|||
int h = val[t].h;
|
||||
int nboxes = 0;
|
||||
detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes);
|
||||
if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh);
|
||||
print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets);
|
||||
if (nms) do_nms_sort(dets, l.side * l.side * l.n, classes, iou_thresh);
|
||||
print_yolo_detections(fps, id, l.side * l.side * l.n, classes, w, h, dets);
|
||||
free_detections(dets, nboxes);
|
||||
free(id);
|
||||
free_image(val[t]);
|
||||
free_image(val_resized[t]);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
|
||||
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double) (time(0) - start));
|
||||
}
|
||||
|
||||
void validate_yolo_recall(char *cfg, char *weights)
|
||||
{
|
||||
void validate_yolo_recall(char *cfg, char *weights) {
|
||||
network *net = load_network(cfg, weights, 0);
|
||||
set_batch_network(net, 1);
|
||||
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
|
||||
|
@ -188,22 +188,22 @@ void validate_yolo_recall(char *cfg, char *weights)
|
|||
|
||||
char *base = "results/comp4_det_test_";
|
||||
list *plist = get_paths("data/voc.2007.test");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
int classes = l.classes;
|
||||
int side = l.side;
|
||||
|
||||
int j, k;
|
||||
FILE **fps = calloc(classes, sizeof(FILE *));
|
||||
for(j = 0; j < classes; ++j){
|
||||
FILE **fps = (FILE **) calloc(classes, sizeof(FILE * ));
|
||||
for (j = 0; j < classes; ++j) {
|
||||
char buff[1024];
|
||||
snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
|
||||
fps[j] = fopen(buff, "w");
|
||||
}
|
||||
|
||||
int m = plist->size;
|
||||
int i=0;
|
||||
int i = 0;
|
||||
|
||||
float thresh = .001;
|
||||
float iou_thresh = .5;
|
||||
|
@ -214,7 +214,7 @@ void validate_yolo_recall(char *cfg, char *weights)
|
|||
int proposals = 0;
|
||||
float avg_iou = 0;
|
||||
|
||||
for(i = 0; i < m; ++i){
|
||||
for (i = 0; i < m; ++i) {
|
||||
char *path = paths[i];
|
||||
image orig = load_image_color(path, 0, 0);
|
||||
image sized = resize_image(orig, net->w, net->h);
|
||||
|
@ -223,7 +223,7 @@ void validate_yolo_recall(char *cfg, char *weights)
|
|||
|
||||
int nboxes = 0;
|
||||
detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes);
|
||||
if (nms) do_nms_obj(dets, side*side*l.n, 1, nms);
|
||||
if (nms) do_nms_obj(dets, side * side * l.n, 1, nms);
|
||||
|
||||
char labelpath[4096];
|
||||
find_replace(path, "images", "labels", labelpath);
|
||||
|
@ -233,8 +233,8 @@ void validate_yolo_recall(char *cfg, char *weights)
|
|||
|
||||
int num_labels = 0;
|
||||
box_label *truth = read_boxes(labelpath, &num_labels);
|
||||
for(k = 0; k < side*side*l.n; ++k){
|
||||
if(dets[k].objectness > thresh){
|
||||
for (k = 0; k < side * side * l.n; ++k) {
|
||||
if (dets[k].objectness > thresh) {
|
||||
++proposals;
|
||||
}
|
||||
}
|
||||
|
@ -242,19 +242,20 @@ void validate_yolo_recall(char *cfg, char *weights)
|
|||
++total;
|
||||
box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
|
||||
float best_iou = 0;
|
||||
for(k = 0; k < side*side*l.n; ++k){
|
||||
for (k = 0; k < side * side * l.n; ++k) {
|
||||
float iou = box_iou(dets[k].bbox, t);
|
||||
if(dets[k].objectness > thresh && iou > best_iou){
|
||||
if (dets[k].objectness > thresh && iou > best_iou) {
|
||||
best_iou = iou;
|
||||
}
|
||||
}
|
||||
avg_iou += best_iou;
|
||||
if(best_iou > iou_thresh){
|
||||
if (best_iou > iou_thresh) {
|
||||
++correct;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
|
||||
fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total,
|
||||
(float) proposals / (i + 1), avg_iou * 100 / total, 100. * correct / total);
|
||||
free_detections(dets, nboxes);
|
||||
free(id);
|
||||
free_image(orig);
|
||||
|
@ -262,39 +263,38 @@ void validate_yolo_recall(char *cfg, char *weights)
|
|||
}
|
||||
}
|
||||
|
||||
void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
|
||||
{
|
||||
void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) {
|
||||
image **alphabet = load_alphabet();
|
||||
network *net = load_network(cfgfile, weightfile, 0);
|
||||
layer l = net->layers[net->n-1];
|
||||
layer l = net->layers[net->n - 1];
|
||||
set_batch_network(net, 1);
|
||||
srand(2222222);
|
||||
clock_t time;
|
||||
char buff[256];
|
||||
char *input = buff;
|
||||
float nms=.4;
|
||||
while(1){
|
||||
if(filename){
|
||||
float nms = .4;
|
||||
while (1) {
|
||||
if (filename) {
|
||||
strncpy(input, filename, 256);
|
||||
} else {
|
||||
printf("Enter Image Path: ");
|
||||
fflush(stdout);
|
||||
input = fgets(input, 256, stdin);
|
||||
if(!input) return;
|
||||
if (!input) return;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
image im = load_image_color(input,0,0);
|
||||
image im = load_image_color(input, 0, 0);
|
||||
image sized = resize_image(im, net->w, net->h);
|
||||
float *X = sized.data;
|
||||
time=clock();
|
||||
time = clock();
|
||||
network_predict(net, X);
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock() - time));
|
||||
|
||||
int nboxes = 0;
|
||||
detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes);
|
||||
if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms);
|
||||
if (nms) do_nms_sort(dets, l.side * l.side * l.n, l.classes, nms);
|
||||
|
||||
draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20);
|
||||
draw_detections(im, dets, l.side * l.side * l.n, thresh, voc_names, alphabet, 20);
|
||||
save_image(im, "predictions");
|
||||
show_image(im, "predictions", 0);
|
||||
free_detections(dets, nboxes);
|
||||
|
@ -304,13 +304,12 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
|
|||
}
|
||||
}
|
||||
|
||||
void run_yolo(int argc, char **argv)
|
||||
{
|
||||
void run_yolo(int argc, char **argv) {
|
||||
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
|
||||
float thresh = find_float_arg(argc, argv, "-thresh", .2);
|
||||
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||
int frame_skip = find_int_arg(argc, argv, "-s", 0);
|
||||
if(argc < 4){
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
@ -318,10 +317,11 @@ void run_yolo(int argc, char **argv)
|
|||
int avg = find_int_arg(argc, argv, "-avg", 1);
|
||||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
char *filename = (argc > 5) ? argv[5]: 0;
|
||||
if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh);
|
||||
else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0);
|
||||
char *filename = (argc > 5) ? argv[5] : 0;
|
||||
if (0 == strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh);
|
||||
else if (0 == strcmp(argv[2], "train")) train_yolo(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "demo"))
|
||||
demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0, 0, 0, 0);
|
||||
}
|
|
@ -1,37 +1,36 @@
|
|||
#ifndef DARKNET_API
|
||||
#define DARKNET_API
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#ifdef GPU
|
||||
#define BLOCK 512
|
||||
#define BLOCK 512
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
#include "cuda.h"
|
||||
|
||||
#ifdef CUDNN
|
||||
#include "cudnn.h"
|
||||
#endif
|
||||
#ifdef CUDNN
|
||||
#include "cudnn.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SECRET_NUM -1234
|
||||
extern int gpu_index;
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
int classes;
|
||||
char **names;
|
||||
} metadata;
|
||||
|
||||
metadata get_metadata(char *file);
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
int *leaf;
|
||||
int n;
|
||||
int *parent;
|
||||
|
@ -43,17 +42,18 @@ typedef struct{
|
|||
int *group_size;
|
||||
int *group_offset;
|
||||
} tree;
|
||||
|
||||
tree *read_tree(char *filename);
|
||||
|
||||
typedef enum{
|
||||
typedef enum {
|
||||
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
|
||||
} ACTIVATION;
|
||||
|
||||
typedef enum{
|
||||
typedef enum {
|
||||
PNG, BMP, TGA, JPG
|
||||
} IMTYPE;
|
||||
|
||||
typedef enum{
|
||||
typedef enum {
|
||||
MULT, ADD, SUB, DIV
|
||||
} BINARY_ACTIVATION;
|
||||
|
||||
|
@ -90,11 +90,11 @@ typedef enum {
|
|||
BLANK
|
||||
} LAYER_TYPE;
|
||||
|
||||
typedef enum{
|
||||
SSE, MASKED, L1, SEG, SMOOTH,WGAN
|
||||
typedef enum {
|
||||
SSE, MASKED, L1, SEG, SMOOTH, WGAN
|
||||
} COST_TYPE;
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
int batch;
|
||||
float learning_rate;
|
||||
float momentum;
|
||||
|
@ -112,16 +112,23 @@ typedef struct network network;
|
|||
struct layer;
|
||||
typedef struct layer layer;
|
||||
|
||||
struct layer{
|
||||
struct layer {
|
||||
LAYER_TYPE type;
|
||||
ACTIVATION activation;
|
||||
COST_TYPE cost_type;
|
||||
void (*forward) (struct layer, struct network);
|
||||
void (*backward) (struct layer, struct network);
|
||||
void (*update) (struct layer, update_args);
|
||||
void (*forward_gpu) (struct layer, struct network);
|
||||
void (*backward_gpu) (struct layer, struct network);
|
||||
void (*update_gpu) (struct layer, update_args);
|
||||
|
||||
void (*forward)(struct layer, struct network);
|
||||
|
||||
void (*backward)(struct layer, struct network);
|
||||
|
||||
void (*update)(struct layer, update_args);
|
||||
|
||||
void (*forward_gpu)(struct layer, struct network);
|
||||
|
||||
void (*backward_gpu)(struct layer, struct network);
|
||||
|
||||
void (*update_gpu)(struct layer, update_args);
|
||||
|
||||
int batch_normalize;
|
||||
int shortcut;
|
||||
int batch;
|
||||
|
@ -133,7 +140,7 @@ struct layer{
|
|||
int nbiases;
|
||||
int extra;
|
||||
int truths;
|
||||
int h,w,c;
|
||||
int h, w, c;
|
||||
int out_h, out_w, out_c;
|
||||
int n;
|
||||
int max_boxes;
|
||||
|
@ -207,69 +214,69 @@ struct layer{
|
|||
float probability;
|
||||
float scale;
|
||||
|
||||
char * cweights;
|
||||
int * indexes;
|
||||
int * input_layers;
|
||||
int * input_sizes;
|
||||
int * map;
|
||||
int * counts;
|
||||
float ** sums;
|
||||
float * rand;
|
||||
float * cost;
|
||||
float * state;
|
||||
float * prev_state;
|
||||
float * forgot_state;
|
||||
float * forgot_delta;
|
||||
float * state_delta;
|
||||
float * combine_cpu;
|
||||
float * combine_delta_cpu;
|
||||
char *cweights;
|
||||
int *indexes;
|
||||
int *input_layers;
|
||||
int *input_sizes;
|
||||
int *map;
|
||||
int *counts;
|
||||
float **sums;
|
||||
float *rand;
|
||||
float *cost;
|
||||
float *state;
|
||||
float *prev_state;
|
||||
float *forgot_state;
|
||||
float *forgot_delta;
|
||||
float *state_delta;
|
||||
float *combine_cpu;
|
||||
float *combine_delta_cpu;
|
||||
|
||||
float * concat;
|
||||
float * concat_delta;
|
||||
float *concat;
|
||||
float *concat_delta;
|
||||
|
||||
float * binary_weights;
|
||||
float *binary_weights;
|
||||
|
||||
float * biases;
|
||||
float * bias_updates;
|
||||
float *biases;
|
||||
float *bias_updates;
|
||||
|
||||
float * scales;
|
||||
float * scale_updates;
|
||||
float *scales;
|
||||
float *scale_updates;
|
||||
|
||||
float * weights;
|
||||
float * weight_updates;
|
||||
float *weights;
|
||||
float *weight_updates;
|
||||
|
||||
float * delta;
|
||||
float * output;
|
||||
float * loss;
|
||||
float * squared;
|
||||
float * norms;
|
||||
float *delta;
|
||||
float *output;
|
||||
float *loss;
|
||||
float *squared;
|
||||
float *norms;
|
||||
|
||||
float * spatial_mean;
|
||||
float * mean;
|
||||
float * variance;
|
||||
float *spatial_mean;
|
||||
float *mean;
|
||||
float *variance;
|
||||
|
||||
float * mean_delta;
|
||||
float * variance_delta;
|
||||
float *mean_delta;
|
||||
float *variance_delta;
|
||||
|
||||
float * rolling_mean;
|
||||
float * rolling_variance;
|
||||
float *rolling_mean;
|
||||
float *rolling_variance;
|
||||
|
||||
float * x;
|
||||
float * x_norm;
|
||||
float *x;
|
||||
float *x_norm;
|
||||
|
||||
float * m;
|
||||
float * v;
|
||||
|
||||
float * bias_m;
|
||||
float * bias_v;
|
||||
float * scale_m;
|
||||
float * scale_v;
|
||||
float *m;
|
||||
float *v;
|
||||
|
||||
float *bias_m;
|
||||
float *bias_v;
|
||||
float *scale_m;
|
||||
float *scale_v;
|
||||
|
||||
|
||||
float *z_cpu;
|
||||
float *r_cpu;
|
||||
float *h_cpu;
|
||||
float * prev_state_cpu;
|
||||
float *prev_state_cpu;
|
||||
|
||||
float *temp_cpu;
|
||||
float *temp2_cpu;
|
||||
|
@ -284,9 +291,9 @@ struct layer{
|
|||
float *g_cpu;
|
||||
float *o_cpu;
|
||||
float *c_cpu;
|
||||
float *dc_cpu;
|
||||
float *dc_cpu;
|
||||
|
||||
float * binary_input;
|
||||
float *binary_input;
|
||||
|
||||
struct layer *input_layer;
|
||||
struct layer *self_layer;
|
||||
|
@ -311,7 +318,7 @@ struct layer{
|
|||
|
||||
struct layer *input_h_layer;
|
||||
struct layer *state_h_layer;
|
||||
|
||||
|
||||
struct layer *wz;
|
||||
struct layer *uz;
|
||||
struct layer *wr;
|
||||
|
@ -427,7 +434,7 @@ typedef enum {
|
|||
CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
|
||||
} learning_rate_policy;
|
||||
|
||||
typedef struct network{
|
||||
typedef struct network {
|
||||
int n;
|
||||
int batch;
|
||||
size_t *seen;
|
||||
|
@ -448,7 +455,7 @@ typedef struct network{
|
|||
int step;
|
||||
int max_batches;
|
||||
float *scales;
|
||||
int *steps;
|
||||
int *steps;
|
||||
int num_steps;
|
||||
int burn_in;
|
||||
|
||||
|
@ -512,11 +519,11 @@ typedef struct {
|
|||
float *data;
|
||||
} image;
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
float x, y, w, h;
|
||||
} box;
|
||||
|
||||
typedef struct detection{
|
||||
typedef struct detection {
|
||||
box bbox;
|
||||
int classes;
|
||||
float *prob;
|
||||
|
@ -525,13 +532,13 @@ typedef struct detection{
|
|||
int sort_class;
|
||||
} detection;
|
||||
|
||||
typedef struct matrix{
|
||||
typedef struct matrix {
|
||||
int rows, cols;
|
||||
float **vals;
|
||||
} matrix;
|
||||
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
int w, h;
|
||||
matrix X;
|
||||
matrix y;
|
||||
|
@ -541,10 +548,27 @@ typedef struct{
|
|||
} data;
|
||||
|
||||
typedef enum {
|
||||
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA
|
||||
CLASSIFICATION_DATA,
|
||||
DETECTION_DATA,
|
||||
CAPTCHA_DATA,
|
||||
REGION_DATA,
|
||||
IMAGE_DATA,
|
||||
COMPARE_DATA,
|
||||
WRITING_DATA,
|
||||
SWAG_DATA,
|
||||
TAG_DATA,
|
||||
OLD_CLASSIFICATION_DATA,
|
||||
STUDY_DATA,
|
||||
DET_DATA,
|
||||
SUPER_DATA,
|
||||
LETTERBOX_DATA,
|
||||
REGRESSION_DATA,
|
||||
SEGMENTATION_DATA,
|
||||
INSTANCE_DATA,
|
||||
ISEG_DATA
|
||||
} data_type;
|
||||
|
||||
typedef struct load_args{
|
||||
typedef struct load_args {
|
||||
int threads;
|
||||
char **paths;
|
||||
char *path;
|
||||
|
@ -577,52 +601,68 @@ typedef struct load_args{
|
|||
tree *hierarchy;
|
||||
} load_args;
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
int id;
|
||||
float x,y,w,h;
|
||||
float x, y, w, h;
|
||||
float left, right, top, bottom;
|
||||
} box_label;
|
||||
|
||||
|
||||
network *load_network(char *cfg, char *weights, int clear);
|
||||
|
||||
load_args get_base_args(network *net);
|
||||
|
||||
void free_data(data d);
|
||||
|
||||
typedef struct node{
|
||||
typedef struct node {
|
||||
void *val;
|
||||
struct node *next;
|
||||
struct node *prev;
|
||||
} node;
|
||||
|
||||
typedef struct list{
|
||||
typedef struct list {
|
||||
int size;
|
||||
node *front;
|
||||
node *back;
|
||||
} list;
|
||||
|
||||
pthread_t load_data(load_args args);
|
||||
|
||||
list *read_data_cfg(char *filename);
|
||||
|
||||
list *read_cfg(char *filename);
|
||||
|
||||
unsigned char *read_file(char *filename);
|
||||
|
||||
data resize_data(data orig, int w, int h);
|
||||
|
||||
data *tile_data(data orig, int divs, int size);
|
||||
|
||||
data select_data(data *orig, int *inds);
|
||||
|
||||
void forward_network(network *net);
|
||||
|
||||
void backward_network(network *net);
|
||||
|
||||
void update_network(network *net);
|
||||
|
||||
|
||||
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
|
||||
|
||||
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
|
||||
|
||||
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY);
|
||||
|
||||
void scal_cpu(int N, float ALPHA, float *X, int INCX);
|
||||
void fill_cpu(int N, float ALPHA, float * X, int INCX);
|
||||
|
||||
void fill_cpu(int N, float ALPHA, float *X, int INCX);
|
||||
|
||||
void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
|
||||
|
||||
void softmax(float *input, int n, float temp, int stride, float *output);
|
||||
|
||||
int best_3d_shift_r(image a, image b, int min, int max);
|
||||
|
||||
#ifdef GPU
|
||||
void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
||||
void fill_gpu(int N, float ALPHA, float * X, int INCX);
|
||||
|
@ -644,112 +684,204 @@ float train_networks(network **nets, int n, data d, int interval);
|
|||
void sync_nets(network **nets, int n, int interval);
|
||||
void harmless_update_network_gpu(network *net);
|
||||
#endif
|
||||
|
||||
image get_label(image **characters, char *string, int size);
|
||||
|
||||
void draw_label(image a, int r, int c, image label, const float *rgb);
|
||||
|
||||
void save_image(image im, const char *name);
|
||||
|
||||
void save_image_options(image im, const char *name, IMTYPE f, int quality);
|
||||
|
||||
void get_next_batch(data d, int n, int offset, float *X, float *y);
|
||||
|
||||
void grayscale_image_3c(image im);
|
||||
|
||||
void normalize_image(image p);
|
||||
|
||||
void matrix_to_csv(matrix m);
|
||||
|
||||
float train_network_sgd(network *net, data d, int n);
|
||||
|
||||
void rgbgr_image(image im);
|
||||
|
||||
data copy_data(data d);
|
||||
|
||||
data concat_data(data d1, data d2);
|
||||
|
||||
data load_cifar10_data(char *filename);
|
||||
|
||||
float matrix_topk_accuracy(matrix truth, matrix guess, int k);
|
||||
|
||||
void matrix_add_matrix(matrix from, matrix to);
|
||||
|
||||
void scale_matrix(matrix m, float scale);
|
||||
|
||||
matrix csv_to_matrix(char *filename);
|
||||
|
||||
float *network_accuracies(network *net, data d, int n);
|
||||
|
||||
float train_network_datum(network *net);
|
||||
|
||||
image make_random_image(int w, int h, int c);
|
||||
|
||||
void denormalize_connected_layer(layer l);
|
||||
|
||||
void denormalize_convolutional_layer(layer l);
|
||||
|
||||
void statistics_connected_layer(layer l);
|
||||
|
||||
void rescale_weights(layer l, float scale, float trans);
|
||||
|
||||
void rgbgr_weights(layer l);
|
||||
|
||||
image *get_weights(layer l);
|
||||
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen);
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes,
|
||||
int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen);
|
||||
|
||||
void get_detection_detections(layer l, int w, int h, float thresh, detection *dets);
|
||||
|
||||
char *option_find_str(list *l, char *key, char *def);
|
||||
|
||||
int option_find_int(list *l, char *key, int def);
|
||||
|
||||
int option_find_int_quiet(list *l, char *key, int def);
|
||||
|
||||
network *parse_network_cfg(char *filename);
|
||||
|
||||
void save_weights(network *net, char *filename);
|
||||
|
||||
void load_weights(network *net, char *filename);
|
||||
|
||||
void save_weights_upto(network *net, char *filename, int cutoff);
|
||||
|
||||
void load_weights_upto(network *net, char *filename, int start, int cutoff);
|
||||
|
||||
void zero_objectness(layer l);
|
||||
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets);
|
||||
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets);
|
||||
|
||||
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh,
|
||||
int relative, detection *dets);
|
||||
|
||||
int
|
||||
get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets);
|
||||
|
||||
void free_network(network *net);
|
||||
|
||||
void set_batch_network(network *net, int b);
|
||||
|
||||
void set_temp_network(network *net, float t);
|
||||
|
||||
image load_image(char *filename, int w, int h, int c);
|
||||
|
||||
image load_image_color(char *filename, int w, int h);
|
||||
|
||||
image make_image(int w, int h, int c);
|
||||
|
||||
image resize_image(image im, int w, int h);
|
||||
|
||||
void censor_image(image im, int dx, int dy, int w, int h);
|
||||
|
||||
image letterbox_image(image im, int w, int h);
|
||||
|
||||
image crop_image(image im, int dx, int dy, int w, int h);
|
||||
|
||||
image center_crop_image(image im, int w, int h);
|
||||
|
||||
image resize_min(image im, int min);
|
||||
|
||||
image resize_max(image im, int max);
|
||||
|
||||
image threshold_image(image im, float thresh);
|
||||
|
||||
image mask_to_rgb(image mask);
|
||||
|
||||
int resize_network(network *net, int w, int h);
|
||||
|
||||
void free_matrix(matrix m);
|
||||
|
||||
void test_resize(char *filename);
|
||||
|
||||
int show_image(image p, const char *name, int ms);
|
||||
|
||||
image copy_image(image p);
|
||||
|
||||
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
|
||||
|
||||
float get_current_rate(network *net);
|
||||
|
||||
void composite_3d(char *f1, char *f2, char *out, int delta);
|
||||
|
||||
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
|
||||
|
||||
size_t get_current_batch(network *net);
|
||||
|
||||
void constrain_image(image im);
|
||||
|
||||
image get_network_image_layer(network *net, int i);
|
||||
|
||||
layer get_network_output_layer(network *net);
|
||||
|
||||
void top_predictions(network *net, int n, int *index);
|
||||
|
||||
void flip_image(image a);
|
||||
|
||||
image float_to_image(int w, int h, int c, float *data);
|
||||
|
||||
void ghost_image(image source, image dest, int dx, int dy);
|
||||
|
||||
float network_accuracy(network *net, data d);
|
||||
|
||||
void random_distort_image(image im, float hue, float saturation, float exposure);
|
||||
|
||||
void fill_image(image m, float s);
|
||||
|
||||
image grayscale_image(image im);
|
||||
|
||||
void rotate_image_cw(image im, int times);
|
||||
|
||||
double what_time_is_it_now();
|
||||
|
||||
image rotate_image(image m, float rad);
|
||||
|
||||
void visualize_network(network *net);
|
||||
|
||||
float box_iou(box a, box b);
|
||||
|
||||
data load_all_cifar10();
|
||||
|
||||
box_label *read_boxes(char *filename, int *n);
|
||||
|
||||
box float_to_box(float *f, int stride);
|
||||
|
||||
void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes);
|
||||
|
||||
matrix network_predict_data(network *net, data test);
|
||||
|
||||
image **load_alphabet();
|
||||
|
||||
image get_network_image(network *net);
|
||||
|
||||
float *network_predict(network *net, float *input);
|
||||
|
||||
int network_width(network *net);
|
||||
|
||||
int network_height(network *net);
|
||||
|
||||
float *network_predict_image(network *net, image im);
|
||||
|
||||
void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets);
|
||||
|
||||
detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num);
|
||||
|
||||
void free_detections(detection *dets, int n);
|
||||
|
||||
void reset_network_state(network *net, int b);
|
||||
|
||||
char **get_labels(char *filename);
|
||||
|
||||
void do_nms_obj(detection *dets, int total, int classes, float thresh);
|
||||
|
||||
void do_nms_sort(detection *dets, int total, int classes, float thresh);
|
||||
|
||||
matrix make_matrix(int rows, int cols);
|
||||
|
@ -761,45 +893,77 @@ void make_window(char *name, int w, int h, int fullscreen);
|
|||
#endif
|
||||
|
||||
void free_image(image m);
|
||||
|
||||
float train_network(network *net, data d);
|
||||
|
||||
pthread_t load_data_in_thread(load_args args);
|
||||
|
||||
void load_data_blocking(load_args args);
|
||||
|
||||
list *get_paths(char *filename);
|
||||
|
||||
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride);
|
||||
|
||||
void change_leaves(tree *t, char *leaf_list);
|
||||
|
||||
int find_int_arg(int argc, char **argv, char *arg, int def);
|
||||
|
||||
float find_float_arg(int argc, char **argv, char *arg, float def);
|
||||
int find_arg(int argc, char* argv[], char *arg);
|
||||
|
||||
int find_arg(int argc, char *argv[], char *arg);
|
||||
|
||||
char *find_char_arg(int argc, char **argv, char *arg, char *def);
|
||||
|
||||
char *basecfg(char *cfgfile);
|
||||
|
||||
void find_replace(char *str, char *orig, char *rep, char *output);
|
||||
|
||||
void free_ptrs(void **ptrs, int n);
|
||||
|
||||
char *fgetl(FILE *fp);
|
||||
|
||||
void strip(char *s);
|
||||
|
||||
float sec(clock_t clocks);
|
||||
|
||||
void **list_to_array(list *l);
|
||||
|
||||
void top_k(float *a, int n, int k, int *index);
|
||||
|
||||
int *read_map(char *filename);
|
||||
|
||||
void error(const char *s);
|
||||
|
||||
int max_index(float *a, int n);
|
||||
|
||||
int max_int_index(int *a, int n);
|
||||
|
||||
int sample_array(float *a, int n);
|
||||
|
||||
int *random_index_order(int min, int max);
|
||||
|
||||
void free_list(list *l);
|
||||
|
||||
float mse_array(float *a, int n);
|
||||
|
||||
float variance_array(float *a, int n);
|
||||
|
||||
float mag_array(float *a, int n);
|
||||
|
||||
void scale_array(float *a, int n, float s);
|
||||
|
||||
float mean_array(float *a, int n);
|
||||
|
||||
float sum_array(float *a, int n);
|
||||
|
||||
void normalize_array(float *a, int n);
|
||||
|
||||
int *read_intlist(char *s, int *n, int d);
|
||||
|
||||
size_t rand_size_t();
|
||||
|
||||
float rand_normal();
|
||||
|
||||
float rand_uniform(float min, float max);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,183 @@
|
|||
import os
|
||||
from ctypes import *
|
||||
import math
|
||||
import random
|
||||
|
||||
#
|
||||
# def sample(probs):
|
||||
# s = sum(probs)
|
||||
# probs = [a / s for a in probs]
|
||||
# r = random.uniform(0, 1)
|
||||
# for i in range(len(probs)):
|
||||
# r = r - probs[i]
|
||||
# if r <= 0:
|
||||
# return i
|
||||
# return len(probs) - 1
|
||||
#
|
||||
#
|
||||
# def c_array(ctype, values):
|
||||
# arr = (ctype * len(values))()
|
||||
# arr[:] = values
|
||||
# return arr
|
||||
#
|
||||
#
|
||||
# class BOX(Structure):
|
||||
# _fields_ = [("x", c_float),
|
||||
# ("y", c_float),
|
||||
# ("w", c_float),
|
||||
# ("h", c_float)]
|
||||
#
|
||||
#
|
||||
# class DETECTION(Structure):
|
||||
# _fields_ = [("bbox", BOX),
|
||||
# ("classes", c_int),
|
||||
# ("prob", POINTER(c_float)),
|
||||
# ("mask", POINTER(c_float)),
|
||||
# ("objectness", c_float),
|
||||
# ("sort_class", c_int)]
|
||||
#
|
||||
#
|
||||
# class IMAGE(Structure):
|
||||
# _fields_ = [("w", c_int),
|
||||
# ("h", c_int),
|
||||
# ("c", c_int),
|
||||
# ("data", POINTER(c_float))]
|
||||
#
|
||||
#
|
||||
# class METADATA(Structure):
|
||||
# _fields_ = [("classes", c_int),
|
||||
# ("names", POINTER(c_char_p))]
|
||||
#
|
||||
#
|
||||
# def main():
|
||||
# # lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
|
||||
# lib = CDLL("libdarknet.so", RTLD_GLOBAL)
|
||||
# lib.network_width.argtypes = [c_void_p]
|
||||
# lib.network_width.restype = c_int
|
||||
# lib.network_height.argtypes = [c_void_p]
|
||||
# lib.network_height.restype = c_int
|
||||
#
|
||||
# predict = lib.network_predict
|
||||
# predict.argtypes = [c_void_p, POINTER(c_float)]
|
||||
# predict.restype = POINTER(c_float)
|
||||
#
|
||||
# set_gpu = lib.cuda_set_device
|
||||
# set_gpu.argtypes = [c_int]
|
||||
#
|
||||
# make_image = lib.make_image
|
||||
# make_image.argtypes = [c_int, c_int, c_int]
|
||||
# make_image.restype = IMAGE
|
||||
#
|
||||
# get_network_boxes = lib.get_network_boxes
|
||||
# get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
|
||||
# get_network_boxes.restype = POINTER(DETECTION)
|
||||
#
|
||||
# make_network_boxes = lib.make_network_boxes
|
||||
# make_network_boxes.argtypes = [c_void_p]
|
||||
# make_network_boxes.restype = POINTER(DETECTION)
|
||||
#
|
||||
# free_detections = lib.free_detections
|
||||
# free_detections.argtypes = [POINTER(DETECTION), c_int]
|
||||
#
|
||||
# free_ptrs = lib.free_ptrs
|
||||
# free_ptrs.argtypes = [POINTER(c_void_p), c_int]
|
||||
#
|
||||
# network_predict = lib.network_predict
|
||||
# network_predict.argtypes = [c_void_p, POINTER(c_float)]
|
||||
#
|
||||
# reset_rnn = lib.reset_rnn
|
||||
# reset_rnn.argtypes = [c_void_p]
|
||||
#
|
||||
# load_net = lib.load_network
|
||||
# load_net.argtypes = [c_char_p, c_char_p, c_int]
|
||||
# load_net.restype = c_void_p
|
||||
#
|
||||
# do_nms_obj = lib.do_nms_obj
|
||||
# do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
|
||||
#
|
||||
# do_nms_sort = lib.do_nms_sort
|
||||
# do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
|
||||
#
|
||||
# free_image = lib.free_image
|
||||
# free_image.argtypes = [IMAGE]
|
||||
#
|
||||
# letterbox_image = lib.letterbox_image
|
||||
# letterbox_image.argtypes = [IMAGE, c_int, c_int]
|
||||
# letterbox_image.restype = IMAGE
|
||||
#
|
||||
# load_meta = lib.get_metadata
|
||||
# lib.get_metadata.argtypes = [c_char_p]
|
||||
# lib.get_metadata.restype = METADATA
|
||||
#
|
||||
# load_image = lib.load_image_color
|
||||
# load_image.argtypes = [c_char_p, c_int, c_int]
|
||||
# load_image.restype = IMAGE
|
||||
#
|
||||
# rgbgr_image = lib.rgbgr_image
|
||||
# rgbgr_image.argtypes = [IMAGE]
|
||||
#
|
||||
# predict_image = lib.network_predict_image
|
||||
# predict_image.argtypes = [c_void_p, IMAGE]
|
||||
# predict_image.restype = POINTER(c_float)
|
||||
#
|
||||
#
|
||||
# def classify(net, meta, im):
|
||||
# out = predict_image(net, im)
|
||||
# res = []
|
||||
# for i in range(meta.classes):
|
||||
# res.append((meta.names[i], out[i]))
|
||||
# res = sorted(res, key=lambda x: -x[1])
|
||||
# return res
|
||||
#
|
||||
#
|
||||
# def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
|
||||
# im = load_image(image, 0, 0)
|
||||
# num = c_int(0)
|
||||
# pnum = pointer(num)
|
||||
# predict_image(net, im)
|
||||
# dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
|
||||
# num = pnum[0]
|
||||
# if (nms): do_nms_obj(dets, num, meta.classes, nms);
|
||||
#
|
||||
# res = []
|
||||
# for j in range(num):
|
||||
# for i in range(meta.classes):
|
||||
# if dets[j].prob[i] > 0:
|
||||
# b = dets[j].bbox
|
||||
# res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
|
||||
# res = sorted(res, key=lambda x: -x[1])
|
||||
# free_image(im)
|
||||
# free_detections(dets, num)
|
||||
# return res
|
||||
#
|
||||
|
||||
# def main():
|
||||
# import os
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# get exe file path, need to copy dynamic library to the same directory before.
|
||||
# absfilepath = os.path.dirname(__file__)
|
||||
# dylib_path = os.path.join(absfilepath, "libdarknet.so")
|
||||
#
|
||||
lib = CDLL("libdarknet.so", RTLD_GLOBAL)
|
||||
lib._Z13network_widthP7network.argtypes = [c_void_p]
|
||||
lib._Z13network_widthP7network.restype = c_int
|
||||
lib._Z13network_widthP7network.argtypes = [c_void_p]
|
||||
lib._Z13network_widthP7network.restype = c_int
|
||||
|
||||
# predict = lib.network_predict
|
||||
predict = lib._Z26network_predict_data_multiP7network4datai
|
||||
predict.argtypes = [c_void_p, POINTER(c_float)]
|
||||
predict.restype = POINTER(c_float)
|
||||
print("predict ", predict)
|
||||
|
||||
# # net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
|
||||
# # im = load_image("data/wolf.jpg", 0, 0)
|
||||
# # meta = load_meta("cfg/imagenet1k.data")
|
||||
# # r = classify(net, meta, im)
|
||||
# # print r[:10]
|
||||
# net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0)
|
||||
# meta = load_meta("cfg/coco.data")
|
||||
# r = detect(net, meta, "data/dog.jpg")
|
||||
# print(r)
|
|
@ -1,37 +0,0 @@
|
|||
from darknet import *
|
||||
|
||||
def predict_tactic(net, s):
|
||||
prob = 0
|
||||
d = c_array(c_float, [0.0]*256)
|
||||
tac = ''
|
||||
if not len(s):
|
||||
s = '\n'
|
||||
for c in s[:-1]:
|
||||
d[ord(c)] = 1
|
||||
pred = predict(net, d)
|
||||
d[ord(c)] = 0
|
||||
c = s[-1]
|
||||
while 1:
|
||||
d[ord(c)] = 1
|
||||
pred = predict(net, d)
|
||||
d[ord(c)] = 0
|
||||
pred = [pred[i] for i in range(256)]
|
||||
ind = sample(pred)
|
||||
c = chr(ind)
|
||||
prob += math.log(pred[ind])
|
||||
if len(tac) and tac[-1] == '.':
|
||||
break
|
||||
tac = tac + c
|
||||
return (tac, prob)
|
||||
|
||||
def predict_tactics(net, s, n):
|
||||
tacs = []
|
||||
for i in range(n):
|
||||
reset_rnn(net)
|
||||
tacs.append(predict_tactic(net, s))
|
||||
tacs = sorted(tacs, key=lambda x: -x[1])
|
||||
return tacs
|
||||
|
||||
net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0)
|
||||
t = predict_tactics(net, "+++++\n", 10)
|
||||
print t
|
|
@ -4,28 +4,30 @@ import os
|
|||
from os import listdir, getcwd
|
||||
from os.path import join
|
||||
|
||||
sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
|
||||
sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
|
||||
|
||||
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
|
||||
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
|
||||
"sofa", "train", "tvmonitor"]
|
||||
|
||||
|
||||
def convert(size, box):
|
||||
dw = 1./(size[0])
|
||||
dh = 1./(size[1])
|
||||
x = (box[0] + box[1])/2.0 - 1
|
||||
y = (box[2] + box[3])/2.0 - 1
|
||||
dw = 1. / (size[0])
|
||||
dh = 1. / (size[1])
|
||||
x = (box[0] + box[1]) / 2.0 - 1
|
||||
y = (box[2] + box[3]) / 2.0 - 1
|
||||
w = box[1] - box[0]
|
||||
h = box[3] - box[2]
|
||||
x = x*dw
|
||||
w = w*dw
|
||||
y = y*dh
|
||||
h = h*dh
|
||||
return (x,y,w,h)
|
||||
x = x * dw
|
||||
w = w * dw
|
||||
y = y * dh
|
||||
h = h * dh
|
||||
return (x, y, w, h)
|
||||
|
||||
|
||||
def convert_annotation(year, image_id):
|
||||
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
|
||||
out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
|
||||
tree=ET.parse(in_file)
|
||||
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
|
||||
out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
|
||||
tree = ET.parse(in_file)
|
||||
root = tree.getroot()
|
||||
size = root.find('size')
|
||||
w = int(size.find('width').text)
|
||||
|
@ -34,26 +36,26 @@ def convert_annotation(year, image_id):
|
|||
for obj in root.iter('object'):
|
||||
difficult = obj.find('difficult').text
|
||||
cls = obj.find('name').text
|
||||
if cls not in classes or int(difficult)==1:
|
||||
if cls not in classes or int(difficult) == 1:
|
||||
continue
|
||||
cls_id = classes.index(cls)
|
||||
xmlbox = obj.find('bndbox')
|
||||
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
|
||||
bb = convert((w,h), b)
|
||||
bb = convert((w, h), b)
|
||||
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
|
||||
|
||||
|
||||
wd = getcwd()
|
||||
|
||||
for year, image_set in sets:
|
||||
if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
|
||||
os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
|
||||
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
|
||||
list_file = open('%s_%s.txt'%(year, image_set), 'w')
|
||||
if not os.path.exists('VOCdevkit/VOC%s/labels/' % (year)):
|
||||
os.makedirs('VOCdevkit/VOC%s/labels/' % (year))
|
||||
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
|
||||
list_file = open('%s_%s.txt' % (year, image_set), 'w')
|
||||
for image_id in image_ids:
|
||||
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
|
||||
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (wd, year, image_id))
|
||||
convert_annotation(year, image_id)
|
||||
list_file.close()
|
||||
|
||||
os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
|
||||
os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
|
||||
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
import xml.etree.ElementTree as ET
|
||||
import pickle
|
||||
import os
|
||||
from os import listdir, getcwd
|
||||
from os.path import join
|
||||
|
||||
sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
|
||||
|
||||
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
|
||||
"sofa", "train", "tvmonitor"]
|
||||
|
||||
|
||||
def convert(size, box):
|
||||
dw = 1. / (size[0])
|
||||
dh = 1. / (size[1])
|
||||
x = (box[0] + box[1]) / 2.0 - 1
|
||||
y = (box[2] + box[3]) / 2.0 - 1
|
||||
w = box[1] - box[0]
|
||||
h = box[3] - box[2]
|
||||
x = x * dw
|
||||
w = w * dw
|
||||
y = y * dh
|
||||
h = h * dh
|
||||
return (x, y, w, h)
|
||||
|
||||
|
||||
def convert_annotation(year, image_id):
|
||||
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
|
||||
out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
|
||||
tree = ET.parse(in_file)
|
||||
root = tree.getroot()
|
||||
size = root.find('size')
|
||||
w = int(size.find('width').text)
|
||||
h = int(size.find('height').text)
|
||||
|
||||
for obj in root.iter('object'):
|
||||
difficult = obj.find('difficult').text
|
||||
cls = obj.find('name').text
|
||||
if cls not in classes or int(difficult) == 1:
|
||||
continue
|
||||
cls_id = classes.index(cls)
|
||||
xmlbox = obj.find('bndbox')
|
||||
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
|
||||
bb = convert((w, h), b)
|
||||
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
|
||||
|
||||
|
||||
wd = getcwd()
|
||||
|
||||
for year, image_set in sets:
|
||||
if not os.path.exists('VOCdevkit/VOC%s/labels/' % (year)):
|
||||
os.makedirs('VOCdevkit/VOC%s/labels/' % (year))
|
||||
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
|
||||
list_file = open('%s_%s.txt' % (year, image_set), 'w')
|
||||
for image_id in image_ids:
|
||||
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (wd, year, image_id))
|
||||
convert_annotation(year, image_id)
|
||||
list_file.close()
|
||||
|
||||
os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
|
||||
os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
|
|
@ -1,84 +1,98 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
extern "C" {
|
||||
#include "activations.h"
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
|
||||
__device__ float lhtan_activate_kernel(float x)
|
||||
{
|
||||
if(x < 0) return .001f*x;
|
||||
if(x > 1) return .001f*(x-1.f) + 1.f;
|
||||
__device__ float lhtan_activate_kernel(float x) {
|
||||
if (x < 0) return .001f * x;
|
||||
if (x > 1) return .001f * (x - 1.f) + 1.f;
|
||||
return x;
|
||||
}
|
||||
__device__ float lhtan_gradient_kernel(float x)
|
||||
{
|
||||
if(x > 0 && x < 1) return 1;
|
||||
|
||||
__device__ float lhtan_gradient_kernel(float x) {
|
||||
if (x > 0 && x < 1) return 1;
|
||||
return .001;
|
||||
}
|
||||
|
||||
__device__ float hardtan_activate_kernel(float x)
|
||||
{
|
||||
__device__ float hardtan_activate_kernel(float x) {
|
||||
if (x < -1) return -1;
|
||||
if (x > 1) return 1;
|
||||
return x;
|
||||
}
|
||||
__device__ float linear_activate_kernel(float x){return x;}
|
||||
__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
|
||||
__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
|
||||
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
||||
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
|
||||
__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);}
|
||||
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
|
||||
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
|
||||
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
|
||||
__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
|
||||
__device__ float plse_activate_kernel(float x)
|
||||
{
|
||||
if(x < -4) return .01f * (x + 4);
|
||||
if(x > 4) return .01f * (x - 4) + 1;
|
||||
return .125f*x + .5f;
|
||||
}
|
||||
__device__ float stair_activate_kernel(float x)
|
||||
{
|
||||
int n = floorf(x);
|
||||
if (n%2 == 0) return floorf(x/2);
|
||||
else return (x - n) + floorf(x/2);
|
||||
}
|
||||
|
||||
|
||||
__device__ float hardtan_gradient_kernel(float x)
|
||||
{
|
||||
__device__ float linear_activate_kernel(float x) { return x; }
|
||||
|
||||
__device__ float logistic_activate_kernel(float x) { return 1.f / (1.f + expf(-x)); }
|
||||
|
||||
__device__ float loggy_activate_kernel(float x) { return 2.f / (1.f + expf(-x)) - 1; }
|
||||
|
||||
__device__ float relu_activate_kernel(float x) { return x * (x > 0); }
|
||||
|
||||
__device__ float elu_activate_kernel(float x) { return (x >= 0) * x + (x < 0) * (expf(x) - 1); }
|
||||
|
||||
__device__ float selu_activate_kernel(float x) {
|
||||
return (x >= 0) * 1.0507f * x + (x < 0) * 1.0507f * 1.6732f * (expf(x) - 1);
|
||||
}
|
||||
|
||||
__device__ float relie_activate_kernel(float x) { return (x > 0) ? x : .01f * x; }
|
||||
|
||||
__device__ float ramp_activate_kernel(float x) { return x * (x > 0) + .1f * x; }
|
||||
|
||||
__device__ float leaky_activate_kernel(float x) { return (x > 0) ? x : .1f * x; }
|
||||
|
||||
__device__ float tanh_activate_kernel(float x) { return (2.f / (1 + expf(-2 * x)) - 1); }
|
||||
|
||||
__device__ float plse_activate_kernel(float x) {
|
||||
if (x < -4) return .01f * (x + 4);
|
||||
if (x > 4) return .01f * (x - 4) + 1;
|
||||
return .125f * x + .5f;
|
||||
}
|
||||
|
||||
__device__ float stair_activate_kernel(float x) {
|
||||
int n = floorf(x);
|
||||
if (n % 2 == 0) return floorf(x / 2);
|
||||
else return (x - n) + floorf(x / 2);
|
||||
}
|
||||
|
||||
|
||||
__device__ float hardtan_gradient_kernel(float x) {
|
||||
if (x > -1 && x < 1) return 1;
|
||||
return 0;
|
||||
}
|
||||
__device__ float linear_gradient_kernel(float x){return 1;}
|
||||
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
|
||||
__device__ float loggy_gradient_kernel(float x)
|
||||
{
|
||||
float y = (x+1)/2;
|
||||
return 2*(1-y)*y;
|
||||
|
||||
__device__ float linear_gradient_kernel(float x) { return 1; }
|
||||
|
||||
__device__ float logistic_gradient_kernel(float x) { return (1 - x) * x; }
|
||||
|
||||
__device__ float loggy_gradient_kernel(float x) {
|
||||
float y = (x + 1) / 2;
|
||||
return 2 * (1 - y) * y;
|
||||
}
|
||||
__device__ float relu_gradient_kernel(float x){return (x>0);}
|
||||
__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
||||
__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
|
||||
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
|
||||
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
|
||||
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
|
||||
__device__ float tanh_gradient_kernel(float x){return 1-x*x;}
|
||||
__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
|
||||
__device__ float stair_gradient_kernel(float x)
|
||||
{
|
||||
|
||||
__device__ float relu_gradient_kernel(float x) { return (x > 0); }
|
||||
|
||||
__device__ float elu_gradient_kernel(float x) { return (x >= 0) + (x < 0) * (x + 1); }
|
||||
|
||||
__device__ float selu_gradient_kernel(float x) { return (x >= 0) * 1.0507 + (x < 0) * (x + 1.0507 * 1.6732); }
|
||||
|
||||
__device__ float relie_gradient_kernel(float x) { return (x > 0) ? 1 : .01f; }
|
||||
|
||||
__device__ float ramp_gradient_kernel(float x) { return (x > 0) + .1f; }
|
||||
|
||||
__device__ float leaky_gradient_kernel(float x) { return (x > 0) ? 1 : .1f; }
|
||||
|
||||
__device__ float tanh_gradient_kernel(float x) { return 1 - x * x; }
|
||||
|
||||
__device__ float plse_gradient_kernel(float x) { return (x < 0 || x > 1) ? .01f : .125f; }
|
||||
|
||||
__device__ float stair_gradient_kernel(float x) {
|
||||
if (floorf(x) == x) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__device__ float activate_kernel(float x, ACTIVATION a)
|
||||
{
|
||||
switch(a){
|
||||
__device__ float activate_kernel(float x, ACTIVATION a) {
|
||||
switch (a) {
|
||||
case LINEAR:
|
||||
return linear_activate_kernel(x);
|
||||
case LOGISTIC:
|
||||
|
@ -111,9 +125,8 @@ __device__ float activate_kernel(float x, ACTIVATION a)
|
|||
return 0;
|
||||
}
|
||||
|
||||
__device__ float gradient_kernel(float x, ACTIVATION a)
|
||||
{
|
||||
switch(a){
|
||||
__device__ float gradient_kernel(float x, ACTIVATION a) {
|
||||
switch (a) {
|
||||
case LINEAR:
|
||||
return linear_gradient_kernel(x);
|
||||
case LOGISTIC:
|
||||
|
@ -146,61 +159,54 @@ __device__ float gradient_kernel(float x, ACTIVATION a)
|
|||
return 0;
|
||||
}
|
||||
|
||||
__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
int i = id % s;
|
||||
int b = id / s;
|
||||
float x1 = x[b*s + i];
|
||||
float x2 = x[b*s + s/2 + i];
|
||||
if(id < n) {
|
||||
float x1 = x[b * s + i];
|
||||
float x2 = x[b * s + s / 2 + i];
|
||||
if (id < n) {
|
||||
float de = dy[id];
|
||||
dx[b*s + i] = x2*de;
|
||||
dx[b*s + s/2 + i] = x1*de;
|
||||
dx[b * s + i] = x2 * de;
|
||||
dx[b * s + s / 2 + i] = x1 * de;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y)
|
||||
{
|
||||
binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
|
||||
check_error(cudaPeekAtLastError());
|
||||
void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) {
|
||||
binary_gradient_array_kernel<<<cuda_gridsize(n / 2), BLOCK>>>(x, dx, n / 2, size, a, y);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
|
||||
__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
int i = id % s;
|
||||
int b = id / s;
|
||||
float x1 = x[b*s + i];
|
||||
float x2 = x[b*s + s/2 + i];
|
||||
if(id < n) y[id] = x1*x2;
|
||||
float x1 = x[b * s + i];
|
||||
float x2 = x[b * s + s / 2 + i];
|
||||
if (id < n) y[id] = x1 * x2;
|
||||
}
|
||||
|
||||
extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y)
|
||||
{
|
||||
binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
|
||||
check_error(cudaPeekAtLastError());
|
||||
void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) {
|
||||
binary_activate_array_kernel<<<cuda_gridsize(n / 2), BLOCK>>>(x, n / 2, size, a, y);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(i < n) x[i] = activate_kernel(x[i], a);
|
||||
__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) {
|
||||
int i = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (i < n) x[i] = activate_kernel(x[i], a);
|
||||
}
|
||||
|
||||
__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(i < n) delta[i] *= gradient_kernel(x[i], a);
|
||||
__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) {
|
||||
int i = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (i < n) delta[i] *= gradient_kernel(x[i], a);
|
||||
}
|
||||
|
||||
extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a)
|
||||
{
|
||||
void activate_array_gpu(float *x, int n, ACTIVATION a) {
|
||||
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
|
||||
check_error(cudaPeekAtLastError());
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
|
||||
{
|
||||
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta) {
|
||||
gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
|
||||
check_error(cudaPeekAtLastError());
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#include "activation_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
|
||||
|
@ -9,17 +8,20 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
|
||||
{
|
||||
layer l = {0};
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
layer make_activation_layer(int batch, int inputs, ACTIVATION activation) {
|
||||
layer l = {(LAYER_TYPE)0};
|
||||
l.type = ACTIVE;
|
||||
|
||||
l.inputs = inputs;
|
||||
l.outputs = inputs;
|
||||
l.batch=batch;
|
||||
l.batch = batch;
|
||||
|
||||
l.output = calloc(batch*inputs, sizeof(float*));
|
||||
l.delta = calloc(batch*inputs, sizeof(float*));
|
||||
l.output = (float*) calloc(batch * inputs, sizeof(float *));
|
||||
l.delta = (float*) calloc(batch * inputs, sizeof(float *));
|
||||
|
||||
l.forward = forward_activation_layer;
|
||||
l.backward = backward_activation_layer;
|
||||
|
@ -35,16 +37,14 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
|
|||
return l;
|
||||
}
|
||||
|
||||
void forward_activation_layer(layer l, network net)
|
||||
{
|
||||
copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
|
||||
activate_array(l.output, l.outputs*l.batch, l.activation);
|
||||
void forward_activation_layer(layer l, network net) {
|
||||
copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
|
||||
activate_array(l.output, l.outputs * l.batch, l.activation);
|
||||
}
|
||||
|
||||
void backward_activation_layer(layer l, network net)
|
||||
{
|
||||
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
|
||||
copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
|
||||
void backward_activation_layer(layer l, network net) {
|
||||
gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
|
||||
copy_cpu(l.outputs * l.batch, l.delta, 1, net.delta, 1);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
|
@ -8,6 +8,7 @@
|
|||
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
|
||||
|
||||
void forward_activation_layer(layer l, network net);
|
||||
|
||||
void backward_activation_layer(layer l, network net);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -5,9 +5,8 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
char *get_activation_string(ACTIVATION a)
|
||||
{
|
||||
switch(a){
|
||||
char *get_activation_string(ACTIVATION a) {
|
||||
switch (a) {
|
||||
case LOGISTIC:
|
||||
return "logistic";
|
||||
case LOGGY:
|
||||
|
@ -42,29 +41,27 @@ char *get_activation_string(ACTIVATION a)
|
|||
return "relu";
|
||||
}
|
||||
|
||||
ACTIVATION get_activation(char *s)
|
||||
{
|
||||
if (strcmp(s, "logistic")==0) return LOGISTIC;
|
||||
if (strcmp(s, "loggy")==0) return LOGGY;
|
||||
if (strcmp(s, "relu")==0) return RELU;
|
||||
if (strcmp(s, "elu")==0) return ELU;
|
||||
if (strcmp(s, "selu")==0) return SELU;
|
||||
if (strcmp(s, "relie")==0) return RELIE;
|
||||
if (strcmp(s, "plse")==0) return PLSE;
|
||||
if (strcmp(s, "hardtan")==0) return HARDTAN;
|
||||
if (strcmp(s, "lhtan")==0) return LHTAN;
|
||||
if (strcmp(s, "linear")==0) return LINEAR;
|
||||
if (strcmp(s, "ramp")==0) return RAMP;
|
||||
if (strcmp(s, "leaky")==0) return LEAKY;
|
||||
if (strcmp(s, "tanh")==0) return TANH;
|
||||
if (strcmp(s, "stair")==0) return STAIR;
|
||||
ACTIVATION get_activation(char *s) {
|
||||
if (strcmp(s, "logistic") == 0) return LOGISTIC;
|
||||
if (strcmp(s, "loggy") == 0) return LOGGY;
|
||||
if (strcmp(s, "relu") == 0) return RELU;
|
||||
if (strcmp(s, "elu") == 0) return ELU;
|
||||
if (strcmp(s, "selu") == 0) return SELU;
|
||||
if (strcmp(s, "relie") == 0) return RELIE;
|
||||
if (strcmp(s, "plse") == 0) return PLSE;
|
||||
if (strcmp(s, "hardtan") == 0) return HARDTAN;
|
||||
if (strcmp(s, "lhtan") == 0) return LHTAN;
|
||||
if (strcmp(s, "linear") == 0) return LINEAR;
|
||||
if (strcmp(s, "ramp") == 0) return RAMP;
|
||||
if (strcmp(s, "leaky") == 0) return LEAKY;
|
||||
if (strcmp(s, "tanh") == 0) return TANH;
|
||||
if (strcmp(s, "stair") == 0) return STAIR;
|
||||
fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
|
||||
return RELU;
|
||||
}
|
||||
|
||||
float activate(float x, ACTIVATION a)
|
||||
{
|
||||
switch(a){
|
||||
float activate(float x, ACTIVATION a) {
|
||||
switch (a) {
|
||||
case LINEAR:
|
||||
return linear_activate(x);
|
||||
case LOGISTIC:
|
||||
|
@ -97,17 +94,15 @@ float activate(float x, ACTIVATION a)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void activate_array(float *x, const int n, const ACTIVATION a)
|
||||
{
|
||||
void activate_array(float *x, const int n, const ACTIVATION a) {
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
for (i = 0; i < n; ++i) {
|
||||
x[i] = activate(x[i], a);
|
||||
}
|
||||
}
|
||||
|
||||
float gradient(float x, ACTIVATION a)
|
||||
{
|
||||
switch(a){
|
||||
float gradient(float x, ACTIVATION a) {
|
||||
switch (a) {
|
||||
case LINEAR:
|
||||
return linear_gradient(x);
|
||||
case LOGISTIC:
|
||||
|
@ -140,10 +135,9 @@ float gradient(float x, ACTIVATION a)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
|
||||
{
|
||||
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) {
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
for (i = 0; i < n; ++i) {
|
||||
delta[i] *= gradient(x[i], a);
|
||||
}
|
||||
}
|
|
@ -1,87 +1,111 @@
|
|||
#ifndef ACTIVATIONS_H
|
||||
#define ACTIVATIONS_H
|
||||
|
||||
#include "darknet.h"
|
||||
#include "cuda.h"
|
||||
#include "math.h"
|
||||
|
||||
ACTIVATION get_activation(char *s);
|
||||
|
||||
char *get_activation_string(ACTIVATION a);
|
||||
|
||||
float activate(float x, ACTIVATION a);
|
||||
|
||||
float gradient(float x, ACTIVATION a);
|
||||
|
||||
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
|
||||
|
||||
void activate_array(float *x, const int n, const ACTIVATION a);
|
||||
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
void activate_array_gpu(float *x, int n, ACTIVATION a);
|
||||
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta);
|
||||
#endif
|
||||
|
||||
static inline float stair_activate(float x)
|
||||
{
|
||||
static inline float stair_activate(float x) {
|
||||
int n = floor(x);
|
||||
if (n%2 == 0) return floor(x/2.);
|
||||
else return (x - n) + floor(x/2.);
|
||||
if (n % 2 == 0) return floor(x / 2.);
|
||||
else return (x - n) + floor(x / 2.);
|
||||
}
|
||||
static inline float hardtan_activate(float x)
|
||||
{
|
||||
|
||||
static inline float hardtan_activate(float x) {
|
||||
if (x < -1) return -1;
|
||||
if (x > 1) return 1;
|
||||
return x;
|
||||
}
|
||||
static inline float linear_activate(float x){return x;}
|
||||
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
|
||||
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
|
||||
static inline float relu_activate(float x){return x*(x>0);}
|
||||
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||
static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);}
|
||||
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
|
||||
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
|
||||
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
|
||||
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
|
||||
static inline float plse_activate(float x)
|
||||
{
|
||||
if(x < -4) return .01 * (x + 4);
|
||||
if(x > 4) return .01 * (x - 4) + 1;
|
||||
return .125*x + .5;
|
||||
|
||||
static inline float linear_activate(float x) { return x; }
|
||||
|
||||
static inline float logistic_activate(float x) { return 1. / (1. + exp(-x)); }
|
||||
|
||||
static inline float loggy_activate(float x) { return 2. / (1. + exp(-x)) - 1; }
|
||||
|
||||
static inline float relu_activate(float x) { return x * (x > 0); }
|
||||
|
||||
static inline float elu_activate(float x) { return (x >= 0) * x + (x < 0) * (exp(x) - 1); }
|
||||
|
||||
static inline float selu_activate(float x) { return (x >= 0) * 1.0507 * x + (x < 0) * 1.0507 * 1.6732 * (exp(x) - 1); }
|
||||
|
||||
static inline float relie_activate(float x) { return (x > 0) ? x : .01 * x; }
|
||||
|
||||
static inline float ramp_activate(float x) { return x * (x > 0) + .1 * x; }
|
||||
|
||||
static inline float leaky_activate(float x) { return (x > 0) ? x : .1 * x; }
|
||||
|
||||
static inline float tanh_activate(float x) { return (exp(2 * x) - 1) / (exp(2 * x) + 1); }
|
||||
|
||||
static inline float plse_activate(float x) {
|
||||
if (x < -4) return .01 * (x + 4);
|
||||
if (x > 4) return .01 * (x - 4) + 1;
|
||||
return .125 * x + .5;
|
||||
}
|
||||
|
||||
static inline float lhtan_activate(float x)
|
||||
{
|
||||
if(x < 0) return .001*x;
|
||||
if(x > 1) return .001*(x-1) + 1;
|
||||
static inline float lhtan_activate(float x) {
|
||||
if (x < 0) return .001 * x;
|
||||
if (x > 1) return .001 * (x - 1) + 1;
|
||||
return x;
|
||||
}
|
||||
static inline float lhtan_gradient(float x)
|
||||
{
|
||||
if(x > 0 && x < 1) return 1;
|
||||
|
||||
static inline float lhtan_gradient(float x) {
|
||||
if (x > 0 && x < 1) return 1;
|
||||
return .001;
|
||||
}
|
||||
|
||||
static inline float hardtan_gradient(float x)
|
||||
{
|
||||
static inline float hardtan_gradient(float x) {
|
||||
if (x > -1 && x < 1) return 1;
|
||||
return 0;
|
||||
}
|
||||
static inline float linear_gradient(float x){return 1;}
|
||||
static inline float logistic_gradient(float x){return (1-x)*x;}
|
||||
static inline float loggy_gradient(float x)
|
||||
{
|
||||
float y = (x+1.)/2.;
|
||||
return 2*(1-y)*y;
|
||||
|
||||
static inline float linear_gradient(float x) { return 1; }
|
||||
|
||||
static inline float logistic_gradient(float x) { return (1 - x) * x; }
|
||||
|
||||
static inline float loggy_gradient(float x) {
|
||||
float y = (x + 1.) / 2.;
|
||||
return 2 * (1 - y) * y;
|
||||
}
|
||||
static inline float stair_gradient(float x)
|
||||
{
|
||||
|
||||
static inline float stair_gradient(float x) {
|
||||
if (floor(x) == x) return 0;
|
||||
return 1;
|
||||
}
|
||||
static inline float relu_gradient(float x){return (x>0);}
|
||||
static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
||||
static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
|
||||
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
|
||||
static inline float ramp_gradient(float x){return (x>0)+.1;}
|
||||
static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
|
||||
static inline float tanh_gradient(float x){return 1-x*x;}
|
||||
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;}
|
||||
|
||||
static inline float relu_gradient(float x) { return (x > 0); }
|
||||
|
||||
static inline float elu_gradient(float x) { return (x >= 0) + (x < 0) * (x + 1); }
|
||||
|
||||
static inline float selu_gradient(float x) { return (x >= 0) * 1.0507 + (x < 0) * (x + 1.0507 * 1.6732); }
|
||||
|
||||
static inline float relie_gradient(float x) { return (x > 0) ? 1 : .01; }
|
||||
|
||||
static inline float ramp_gradient(float x) { return (x > 0) + .1; }
|
||||
|
||||
static inline float leaky_gradient(float x) { return (x > 0) ? 1 : .1; }
|
||||
|
||||
static inline float tanh_gradient(float x) { return 1 - x * x; }
|
||||
|
||||
static inline float plse_gradient(float x) { return (x < 0 || x > 1) ? .01 : .125; }
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
#include "avgpool_layer.h"
|
||||
#include "cuda.h"
|
||||
#include <stdio.h>
|
||||
|
||||
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
|
||||
{
|
||||
fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c);
|
||||
avgpool_layer l = {0};
|
||||
l.type = AVGPOOL;
|
||||
l.batch = batch;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.out_w = 1;
|
||||
l.out_h = 1;
|
||||
l.out_c = c;
|
||||
l.outputs = l.out_c;
|
||||
l.inputs = h*w*c;
|
||||
int output_size = l.outputs * batch;
|
||||
l.output = calloc(output_size, sizeof(float));
|
||||
l.delta = calloc(output_size, sizeof(float));
|
||||
l.forward = forward_avgpool_layer;
|
||||
l.backward = backward_avgpool_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_avgpool_layer_gpu;
|
||||
l.backward_gpu = backward_avgpool_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, output_size);
|
||||
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void resize_avgpool_layer(avgpool_layer *l, int w, int h)
|
||||
{
|
||||
l->w = w;
|
||||
l->h = h;
|
||||
l->inputs = h*w*l->c;
|
||||
}
|
||||
|
||||
void forward_avgpool_layer(const avgpool_layer l, network net)
|
||||
{
|
||||
int b,i,k;
|
||||
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
for(k = 0; k < l.c; ++k){
|
||||
int out_index = k + b*l.c;
|
||||
l.output[out_index] = 0;
|
||||
for(i = 0; i < l.h*l.w; ++i){
|
||||
int in_index = i + l.h*l.w*(k + b*l.c);
|
||||
l.output[out_index] += net.input[in_index];
|
||||
}
|
||||
l.output[out_index] /= l.h*l.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void backward_avgpool_layer(const avgpool_layer l, network net)
|
||||
{
|
||||
int b,i,k;
|
||||
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
for(k = 0; k < l.c; ++k){
|
||||
int out_index = k + b*l.c;
|
||||
for(i = 0; i < l.h*l.w; ++i){
|
||||
int in_index = i + l.h*l.w*(k + b*l.c);
|
||||
net.delta[in_index] += l.delta[out_index] / (l.h*l.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
#include "avgpool_layer.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) {
|
||||
fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c);
|
||||
avgpool_layer l = {(LAYER_TYPE)0};
|
||||
l.type = AVGPOOL;
|
||||
l.batch = batch;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.out_w = 1;
|
||||
l.out_h = 1;
|
||||
l.out_c = c;
|
||||
l.outputs = l.out_c;
|
||||
l.inputs = h * w * c;
|
||||
int output_size = l.outputs * batch;
|
||||
l.output = (float*) calloc(output_size, sizeof(float));
|
||||
l.delta = (float*) calloc(output_size, sizeof(float));
|
||||
l.forward = forward_avgpool_layer;
|
||||
l.backward = backward_avgpool_layer;
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_avgpool_layer_gpu;
|
||||
l.backward_gpu = backward_avgpool_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, output_size);
|
||||
l.delta_gpu = cuda_make_array(l.delta, output_size);
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void resize_avgpool_layer(avgpool_layer *l, int w, int h) {
|
||||
l->w = w;
|
||||
l->h = h;
|
||||
l->inputs = h * w * l->c;
|
||||
}
|
||||
|
||||
void forward_avgpool_layer(const avgpool_layer l, network net) {
|
||||
int b, i, k;
|
||||
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
for (k = 0; k < l.c; ++k) {
|
||||
int out_index = k + b * l.c;
|
||||
l.output[out_index] = 0;
|
||||
for (i = 0; i < l.h * l.w; ++i) {
|
||||
int in_index = i + l.h * l.w * (k + b * l.c);
|
||||
l.output[out_index] += net.input[in_index];
|
||||
}
|
||||
l.output[out_index] /= l.h * l.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void backward_avgpool_layer(const avgpool_layer l, network net) {
|
||||
int b, i, k;
|
||||
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
for (k = 0; k < l.c; ++k) {
|
||||
int out_index = k + b * l.c;
|
||||
for (i = 0; i < l.h * l.w; ++i) {
|
||||
int in_index = i + l.h * l.w * (k + b * l.c);
|
||||
net.delta[in_index] += l.delta[out_index] / (l.h * l.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,7 +2,6 @@
|
|||
#define AVGPOOL_LAYER_H
|
||||
|
||||
#include "image.h"
|
||||
#include "cuda.h"
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
|
@ -15,6 +14,7 @@ void forward_avgpool_layer(const avgpool_layer l, network net);
|
|||
void backward_avgpool_layer(const avgpool_layer l, network net);
|
||||
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
void forward_avgpool_layer_gpu(avgpool_layer l, network net);
|
||||
void backward_avgpool_layer_gpu(avgpool_layer l, network net);
|
||||
#endif
|
||||
|
|
|
@ -1,61 +1,59 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
extern "C" {
|
||||
#include "avgpool_layer.h"
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id >= n) return;
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id >= n) return;
|
||||
|
||||
int k = id % c;
|
||||
id /= c;
|
||||
int b = id;
|
||||
|
||||
int i;
|
||||
int out_index = (k + c*b);
|
||||
int out_index = (k + c * b);
|
||||
output[out_index] = 0;
|
||||
for(i = 0; i < w*h; ++i){
|
||||
int in_index = i + h*w*(k + b*c);
|
||||
for (i = 0; i < w * h; ++i) {
|
||||
int in_index = i + h * w * (k + b * c);
|
||||
output[out_index] += input[in_index];
|
||||
}
|
||||
output[out_index] /= w*h;
|
||||
output[out_index] /= w * h;
|
||||
}
|
||||
|
||||
__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id >= n) return;
|
||||
__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id >= n) return;
|
||||
|
||||
int k = id % c;
|
||||
id /= c;
|
||||
int b = id;
|
||||
|
||||
int i;
|
||||
int out_index = (k + c*b);
|
||||
for(i = 0; i < w*h; ++i){
|
||||
int in_index = i + h*w*(k + b*c);
|
||||
in_delta[in_index] += out_delta[out_index] / (w*h);
|
||||
int out_index = (k + c * b);
|
||||
for (i = 0; i < w * h; ++i) {
|
||||
int in_index = i + h * w * (k + b * c);
|
||||
in_delta[in_index] += out_delta[out_index] / (w * h);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network net)
|
||||
{
|
||||
size_t n = layer.c*layer.batch;
|
||||
void forward_avgpool_layer_gpu(avgpool_layer layer, network net) {
|
||||
size_t n = layer.c * layer.batch;
|
||||
|
||||
forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
forward_avgpool_layer_kernel<<<cuda_gridsize(
|
||||
n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.input_gpu, layer.output_gpu);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network net)
|
||||
{
|
||||
size_t n = layer.c*layer.batch;
|
||||
void backward_avgpool_layer_gpu(avgpool_layer layer, network net) {
|
||||
size_t n = layer.c * layer.batch;
|
||||
|
||||
backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
backward_avgpool_layer_kernel<<<cuda_gridsize(
|
||||
n), BLOCK>>>(n, layer.w, layer.h, layer.c, net.delta_gpu, layer.delta_gpu);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
|
|
|
@ -3,34 +3,33 @@
|
|||
#include "blas.h"
|
||||
#include <stdio.h>
|
||||
|
||||
layer make_batchnorm_layer(int batch, int w, int h, int c)
|
||||
{
|
||||
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
|
||||
layer l = {0};
|
||||
layer make_batchnorm_layer(int batch, int w, int h, int c) {
|
||||
fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w, h, c);
|
||||
layer l = {(LAYER_TYPE)0};
|
||||
l.type = BATCHNORM;
|
||||
l.batch = batch;
|
||||
l.h = l.out_h = h;
|
||||
l.w = l.out_w = w;
|
||||
l.c = l.out_c = c;
|
||||
l.output = calloc(h * w * c * batch, sizeof(float));
|
||||
l.delta = calloc(h * w * c * batch, sizeof(float));
|
||||
l.inputs = w*h*c;
|
||||
l.output = (float *) calloc(h * w * c * batch, sizeof(float));
|
||||
l.delta = (float *) calloc(h * w * c * batch, sizeof(float));
|
||||
l.inputs = w * h * c;
|
||||
l.outputs = l.inputs;
|
||||
|
||||
l.scales = calloc(c, sizeof(float));
|
||||
l.scale_updates = calloc(c, sizeof(float));
|
||||
l.biases = calloc(c, sizeof(float));
|
||||
l.bias_updates = calloc(c, sizeof(float));
|
||||
l.scales = (float *) calloc(c, sizeof(float));
|
||||
l.scale_updates = (float *) calloc(c, sizeof(float));
|
||||
l.biases = (float *) calloc(c, sizeof(float));
|
||||
l.bias_updates = (float *) calloc(c, sizeof(float));
|
||||
int i;
|
||||
for(i = 0; i < c; ++i){
|
||||
for (i = 0; i < c; ++i) {
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = calloc(c, sizeof(float));
|
||||
l.variance = calloc(c, sizeof(float));
|
||||
l.mean = (float *) calloc(c, sizeof(float));
|
||||
l.variance = (float *) calloc(c, sizeof(float));
|
||||
|
||||
l.rolling_mean = calloc(c, sizeof(float));
|
||||
l.rolling_variance = calloc(c, sizeof(float));
|
||||
l.rolling_mean = (float *) calloc(c, sizeof(float));
|
||||
l.rolling_variance = (float *) calloc(c, sizeof(float));
|
||||
|
||||
l.forward = forward_batchnorm_layer;
|
||||
l.backward = backward_batchnorm_layer;
|
||||
|
@ -58,25 +57,24 @@ layer make_batchnorm_layer(int batch, int w, int h, int c)
|
|||
|
||||
l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs);
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
#ifdef CUDNN
|
||||
hipdnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
hipdnnSetTensor4dDescriptor(l.dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.normTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
|
||||
{
|
||||
int i,b,f;
|
||||
for(f = 0; f < n; ++f){
|
||||
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) {
|
||||
int i, b, f;
|
||||
for (f = 0; f < n; ++f) {
|
||||
float sum = 0;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(i = 0; i < size; ++i){
|
||||
int index = i + size*(f + n*b);
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < size; ++i) {
|
||||
int index = i + size * (f + n * b);
|
||||
sum += delta[index] * x_norm[index];
|
||||
}
|
||||
}
|
||||
|
@ -84,91 +82,92 @@ void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size,
|
|||
}
|
||||
}
|
||||
|
||||
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
|
||||
{
|
||||
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) {
|
||||
|
||||
int i,j,k;
|
||||
for(i = 0; i < filters; ++i){
|
||||
int i, j, k;
|
||||
for (i = 0; i < filters; ++i) {
|
||||
mean_delta[i] = 0;
|
||||
for (j = 0; j < batch; ++j) {
|
||||
for (k = 0; k < spatial; ++k) {
|
||||
int index = j*filters*spatial + i*spatial + k;
|
||||
int index = j * filters * spatial + i * spatial + k;
|
||||
mean_delta[i] += delta[index];
|
||||
}
|
||||
}
|
||||
mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
|
||||
mean_delta[i] *= (-1. / sqrt(variance[i] + .00001f));
|
||||
}
|
||||
}
|
||||
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
|
||||
{
|
||||
|
||||
int i,j,k;
|
||||
for(i = 0; i < filters; ++i){
|
||||
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial,
|
||||
float *variance_delta) {
|
||||
|
||||
int i, j, k;
|
||||
for (i = 0; i < filters; ++i) {
|
||||
variance_delta[i] = 0;
|
||||
for(j = 0; j < batch; ++j){
|
||||
for(k = 0; k < spatial; ++k){
|
||||
int index = j*filters*spatial + i*spatial + k;
|
||||
variance_delta[i] += delta[index]*(x[index] - mean[i]);
|
||||
for (j = 0; j < batch; ++j) {
|
||||
for (k = 0; k < spatial; ++k) {
|
||||
int index = j * filters * spatial + i * spatial + k;
|
||||
variance_delta[i] += delta[index] * (x[index] - mean[i]);
|
||||
}
|
||||
}
|
||||
variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
|
||||
variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float) (-3. / 2.));
|
||||
}
|
||||
}
|
||||
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
|
||||
{
|
||||
|
||||
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch,
|
||||
int filters, int spatial, float *delta) {
|
||||
int f, j, k;
|
||||
for(j = 0; j < batch; ++j){
|
||||
for(f = 0; f < filters; ++f){
|
||||
for(k = 0; k < spatial; ++k){
|
||||
int index = j*filters*spatial + f*spatial + k;
|
||||
delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
|
||||
for (j = 0; j < batch; ++j) {
|
||||
for (f = 0; f < filters; ++f) {
|
||||
for (k = 0; k < spatial; ++k) {
|
||||
int index = j * filters * spatial + f * spatial + k;
|
||||
delta[index] = delta[index] * 1. / (sqrt(variance[f] + .00001f)) +
|
||||
variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) +
|
||||
mean_delta[f] / (spatial * batch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void resize_batchnorm_layer(layer *layer, int w, int h)
|
||||
{
|
||||
void resize_batchnorm_layer(layer *layer, int w, int h) {
|
||||
fprintf(stderr, "Not implemented\n");
|
||||
}
|
||||
|
||||
void forward_batchnorm_layer(layer l, network net)
|
||||
{
|
||||
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
|
||||
copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
|
||||
if(net.train){
|
||||
mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
|
||||
variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);
|
||||
void forward_batchnorm_layer(layer l, network net) {
|
||||
if (l.type == BATCHNORM) copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
|
||||
copy_cpu(l.outputs * l.batch, l.output, 1, l.x, 1);
|
||||
if (net.train) {
|
||||
mean_cpu(l.output, l.batch, l.out_c, l.out_h * l.out_w, l.mean);
|
||||
variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h * l.out_w, l.variance);
|
||||
|
||||
scal_cpu(l.out_c, .99, l.rolling_mean, 1);
|
||||
axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
|
||||
scal_cpu(l.out_c, .99, l.rolling_variance, 1);
|
||||
axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);
|
||||
|
||||
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
|
||||
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h * l.out_w);
|
||||
copy_cpu(l.outputs * l.batch, l.output, 1, l.x_norm, 1);
|
||||
} else {
|
||||
normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h * l.out_w);
|
||||
}
|
||||
scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h * l.out_w);
|
||||
add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h * l.out_w);
|
||||
}
|
||||
|
||||
void backward_batchnorm_layer(layer l, network net)
|
||||
{
|
||||
if(!net.train){
|
||||
void backward_batchnorm_layer(layer l, network net) {
|
||||
if (!net.train) {
|
||||
l.mean = l.rolling_mean;
|
||||
l.variance = l.rolling_variance;
|
||||
}
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w*l.out_h);
|
||||
backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates);
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.out_c, l.out_w * l.out_h);
|
||||
backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w * l.out_h, l.scale_updates);
|
||||
|
||||
scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
|
||||
scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h * l.out_w);
|
||||
|
||||
mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta);
|
||||
variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta);
|
||||
normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
|
||||
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
|
||||
mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w * l.out_h, l.mean_delta);
|
||||
variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w * l.out_h, l.variance_delta);
|
||||
normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w * l.out_h,
|
||||
l.delta);
|
||||
if (l.type == BATCHNORM) copy_cpu(l.outputs * l.batch, l.delta, 1, net.delta, 1);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
@ -194,8 +193,8 @@ void forward_batchnorm_layer_gpu(layer l, network net)
|
|||
#ifdef CUDNN
|
||||
float one = 1;
|
||||
float zero = 0;
|
||||
cudnnBatchNormalizationForwardTraining(cudnn_handle(),
|
||||
CUDNN_BATCHNORM_SPATIAL,
|
||||
hipdnnBatchNormalizationForwardTraining(cudnn_handle(),
|
||||
HIPDNN_BATCHNORM_SPATIAL,
|
||||
&one,
|
||||
&zero,
|
||||
l.dstTensorDesc,
|
||||
|
@ -244,8 +243,8 @@ void backward_batchnorm_layer_gpu(layer l, network net)
|
|||
#ifdef CUDNN
|
||||
float one = 1;
|
||||
float zero = 0;
|
||||
cudnnBatchNormalizationBackward(cudnn_handle(),
|
||||
CUDNN_BATCHNORM_SPATIAL,
|
||||
hipdnnBatchNormalizationBackward(cudnn_handle(),
|
||||
HIPDNN_BATCHNORM_SPATIAL,
|
||||
&one,
|
||||
&zero,
|
||||
&one,
|
351
src/blas.c
351
src/blas.c
|
@ -1,351 +0,0 @@
|
|||
#include "blas.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
|
||||
{
|
||||
int b,i,j,k;
|
||||
int out_c = c/(stride*stride);
|
||||
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(k = 0; k < c; ++k){
|
||||
for(j = 0; j < h; ++j){
|
||||
for(i = 0; i < w; ++i){
|
||||
int in_index = i + w*(j + h*(k + c*b));
|
||||
int c2 = k % out_c;
|
||||
int offset = k / out_c;
|
||||
int w2 = i*stride + offset % stride;
|
||||
int h2 = j*stride + offset / stride;
|
||||
int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
|
||||
if(forward) out[out_index] = x[in_index];
|
||||
else out[in_index] = x[out_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flatten(float *x, int size, int layers, int batch, int forward)
|
||||
{
|
||||
float *swap = calloc(size*layers*batch, sizeof(float));
|
||||
int i,c,b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(c = 0; c < layers; ++c){
|
||||
for(i = 0; i < size; ++i){
|
||||
int i1 = b*layers*size + c*size + i;
|
||||
int i2 = b*layers*size + i*layers + c;
|
||||
if (forward) swap[i2] = x[i1];
|
||||
else swap[i1] = x[i2];
|
||||
}
|
||||
}
|
||||
}
|
||||
memcpy(x, swap, size*layers*batch*sizeof(float));
|
||||
free(swap);
|
||||
}
|
||||
|
||||
void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0);
|
||||
}
|
||||
}
|
||||
|
||||
void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
if(da) da[i] += dc[i] * s[i];
|
||||
if(db) db[i] += dc[i] * (1-s[i]);
|
||||
ds[i] += dc[i] * (a[i] - b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out)
|
||||
{
|
||||
int stride = w1/w2;
|
||||
int sample = w2/w1;
|
||||
assert(stride == h1/h2);
|
||||
assert(sample == h2/h1);
|
||||
if(stride < 1) stride = 1;
|
||||
if(sample < 1) sample = 1;
|
||||
int minw = (w1 < w2) ? w1 : w2;
|
||||
int minh = (h1 < h2) ? h1 : h2;
|
||||
int minc = (c1 < c2) ? c1 : c2;
|
||||
|
||||
int i,j,k,b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(k = 0; k < minc; ++k){
|
||||
for(j = 0; j < minh; ++j){
|
||||
for(i = 0; i < minw; ++i){
|
||||
int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
|
||||
int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
|
||||
out[out_index] = s1*out[out_index] + s2*add[add_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
|
||||
{
|
||||
float scale = 1./(batch * spatial);
|
||||
int i,j,k;
|
||||
for(i = 0; i < filters; ++i){
|
||||
mean[i] = 0;
|
||||
for(j = 0; j < batch; ++j){
|
||||
for(k = 0; k < spatial; ++k){
|
||||
int index = j*filters*spatial + i*spatial + k;
|
||||
mean[i] += x[index];
|
||||
}
|
||||
}
|
||||
mean[i] *= scale;
|
||||
}
|
||||
}
|
||||
|
||||
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
|
||||
{
|
||||
float scale = 1./(batch * spatial - 1);
|
||||
int i,j,k;
|
||||
for(i = 0; i < filters; ++i){
|
||||
variance[i] = 0;
|
||||
for(j = 0; j < batch; ++j){
|
||||
for(k = 0; k < spatial; ++k){
|
||||
int index = j*filters*spatial + i*spatial + k;
|
||||
variance[i] += pow((x[index] - mean[i]), 2);
|
||||
}
|
||||
}
|
||||
variance[i] *= scale;
|
||||
}
|
||||
}
|
||||
|
||||
void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial)
|
||||
{
|
||||
int b,f,i;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(i = 0; i < spatial; ++i){
|
||||
float sum = 0;
|
||||
for(f = 0; f < filters; ++f){
|
||||
int index = b*filters*spatial + f*spatial + i;
|
||||
sum += powf(x[index], 2);
|
||||
}
|
||||
sum = sqrtf(sum);
|
||||
for(f = 0; f < filters; ++f){
|
||||
int index = b*filters*spatial + f*spatial + i;
|
||||
x[index] /= sum;
|
||||
dx[index] = (1 - x[index]) / sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
|
||||
{
|
||||
int b, f, i;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(f = 0; f < filters; ++f){
|
||||
for(i = 0; i < spatial; ++i){
|
||||
int index = b*filters*spatial + f*spatial + i;
|
||||
x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void const_cpu(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
|
||||
}
|
||||
|
||||
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX];
|
||||
}
|
||||
|
||||
void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA);
|
||||
}
|
||||
|
||||
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX];
|
||||
}
|
||||
|
||||
void scal_cpu(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
|
||||
}
|
||||
|
||||
void fill_cpu(int N, float ALPHA, float *X, int INCX)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
|
||||
}
|
||||
|
||||
void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
|
||||
{
|
||||
int i, j;
|
||||
int index = 0;
|
||||
for(j = 0; j < B; ++j) {
|
||||
for(i = 0; i < NX; ++i){
|
||||
if(X) X[j*NX + i] += OUT[index];
|
||||
++index;
|
||||
}
|
||||
for(i = 0; i < NY; ++i){
|
||||
if(Y) Y[j*NY + i] += OUT[index];
|
||||
++index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
|
||||
{
|
||||
int i, j;
|
||||
int index = 0;
|
||||
for(j = 0; j < B; ++j) {
|
||||
for(i = 0; i < NX; ++i){
|
||||
OUT[index++] = X[j*NX + i];
|
||||
}
|
||||
for(i = 0; i < NY; ++i){
|
||||
OUT[index++] = Y[j*NY + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
|
||||
}
|
||||
|
||||
void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i];
|
||||
}
|
||||
|
||||
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
float diff = truth[i] - pred[i];
|
||||
float abs_val = fabs(diff);
|
||||
if(abs_val < 1) {
|
||||
error[i] = diff * diff;
|
||||
delta[i] = diff;
|
||||
}
|
||||
else {
|
||||
error[i] = 2*abs_val - 1;
|
||||
delta[i] = (diff < 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
float diff = truth[i] - pred[i];
|
||||
error[i] = fabs(diff);
|
||||
delta[i] = diff > 0 ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
float t = truth[i];
|
||||
float p = pred[i];
|
||||
error[i] = (t) ? -log(p) : 0;
|
||||
delta[i] = t-p;
|
||||
}
|
||||
}
|
||||
|
||||
void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
float t = truth[i];
|
||||
float p = pred[i];
|
||||
error[i] = -t*log(p) - (1-t)*log(1-p);
|
||||
delta[i] = t-p;
|
||||
}
|
||||
}
|
||||
|
||||
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
float diff = truth[i] - pred[i];
|
||||
error[i] = diff * diff;
|
||||
delta[i] = diff;
|
||||
}
|
||||
}
|
||||
|
||||
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
|
||||
{
|
||||
int i;
|
||||
float dot = 0;
|
||||
for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY];
|
||||
return dot;
|
||||
}
|
||||
|
||||
void softmax(float *input, int n, float temp, int stride, float *output)
|
||||
{
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -FLT_MAX;
|
||||
for(i = 0; i < n; ++i){
|
||||
if(input[i*stride] > largest) largest = input[i*stride];
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
float e = exp(input[i*stride]/temp - largest/temp);
|
||||
sum += e;
|
||||
output[i*stride] = e;
|
||||
}
|
||||
for(i = 0; i < n; ++i){
|
||||
output[i*stride] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
|
||||
{
|
||||
int g, b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(g = 0; g < groups; ++g){
|
||||
softmax(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
|
||||
{
|
||||
int i, j, k, b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(k = 0; k < c; ++k){
|
||||
for(j = 0; j < h*stride; ++j){
|
||||
for(i = 0; i < w*stride; ++i){
|
||||
int in_index = b*w*h*c + k*w*h + (j/stride)*w + i/stride;
|
||||
int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i;
|
||||
if(forward) out[out_index] = scale*in[in_index];
|
||||
else in[in_index] += scale*out[out_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,326 @@
|
|||
#include "blas.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) {
|
||||
int b, i, j, k;
|
||||
int out_c = c / (stride * stride);
|
||||
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (k = 0; k < c; ++k) {
|
||||
for (j = 0; j < h; ++j) {
|
||||
for (i = 0; i < w; ++i) {
|
||||
int in_index = i + w * (j + h * (k + c * b));
|
||||
int c2 = k % out_c;
|
||||
int offset = k / out_c;
|
||||
int w2 = i * stride + offset % stride;
|
||||
int h2 = j * stride + offset / stride;
|
||||
int out_index = w2 + w * stride * (h2 + h * stride * (c2 + out_c * b));
|
||||
if (forward) out[out_index] = x[in_index];
|
||||
else out[in_index] = x[out_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flatten(float *x, int size, int layers, int batch, int forward) {
|
||||
float *swap = (float *) calloc(size * layers * batch, sizeof(float));
|
||||
int i, c, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (c = 0; c < layers; ++c) {
|
||||
for (i = 0; i < size; ++i) {
|
||||
int i1 = b * layers * size + c * size + i;
|
||||
int i2 = b * layers * size + i * layers + c;
|
||||
if (forward) swap[i2] = x[i1];
|
||||
else swap[i1] = x[i2];
|
||||
}
|
||||
}
|
||||
}
|
||||
memcpy(x, swap, size * layers * batch * sizeof(float));
|
||||
free(swap);
|
||||
}
|
||||
|
||||
void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
c[i] = s[i] * a[i] + (1 - s[i]) * (b ? b[i] : 0);
|
||||
}
|
||||
}
|
||||
|
||||
void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (da) da[i] += dc[i] * s[i];
|
||||
if (db) db[i] += dc[i] * (1 - s[i]);
|
||||
ds[i] += dc[i] * (a[i] - b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) {
|
||||
int stride = w1 / w2;
|
||||
int sample = w2 / w1;
|
||||
assert(stride == h1 / h2);
|
||||
assert(sample == h2 / h1);
|
||||
if (stride < 1) stride = 1;
|
||||
if (sample < 1) sample = 1;
|
||||
int minw = (w1 < w2) ? w1 : w2;
|
||||
int minh = (h1 < h2) ? h1 : h2;
|
||||
int minc = (c1 < c2) ? c1 : c2;
|
||||
|
||||
int i, j, k, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (k = 0; k < minc; ++k) {
|
||||
for (j = 0; j < minh; ++j) {
|
||||
for (i = 0; i < minw; ++i) {
|
||||
int out_index = i * sample + w2 * (j * sample + h2 * (k + c2 * b));
|
||||
int add_index = i * stride + w1 * (j * stride + h1 * (k + c1 * b));
|
||||
out[out_index] = s1 * out[out_index] + s2 * add[add_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) {
|
||||
float scale = 1. / (batch * spatial);
|
||||
int i, j, k;
|
||||
for (i = 0; i < filters; ++i) {
|
||||
mean[i] = 0;
|
||||
for (j = 0; j < batch; ++j) {
|
||||
for (k = 0; k < spatial; ++k) {
|
||||
int index = j * filters * spatial + i * spatial + k;
|
||||
mean[i] += x[index];
|
||||
}
|
||||
}
|
||||
mean[i] *= scale;
|
||||
}
|
||||
}
|
||||
|
||||
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) {
|
||||
float scale = 1. / (batch * spatial - 1);
|
||||
int i, j, k;
|
||||
for (i = 0; i < filters; ++i) {
|
||||
variance[i] = 0;
|
||||
for (j = 0; j < batch; ++j) {
|
||||
for (k = 0; k < spatial; ++k) {
|
||||
int index = j * filters * spatial + i * spatial + k;
|
||||
variance[i] += pow((x[index] - mean[i]), 2);
|
||||
}
|
||||
}
|
||||
variance[i] *= scale;
|
||||
}
|
||||
}
|
||||
|
||||
void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial) {
|
||||
int b, f, i;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < spatial; ++i) {
|
||||
float sum = 0;
|
||||
for (f = 0; f < filters; ++f) {
|
||||
int index = b * filters * spatial + f * spatial + i;
|
||||
sum += powf(x[index], 2);
|
||||
}
|
||||
sum = sqrtf(sum);
|
||||
for (f = 0; f < filters; ++f) {
|
||||
int index = b * filters * spatial + f * spatial + i;
|
||||
x[index] /= sum;
|
||||
dx[index] = (1 - x[index]) / sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) {
|
||||
int b, f, i;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (f = 0; f < filters; ++f) {
|
||||
for (i = 0; i < spatial; ++i) {
|
||||
int index = b * filters * spatial + f * spatial + i;
|
||||
x[index] = (x[index] - mean[f]) / (sqrt(variance[f]) + .000001f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void const_cpu(int N, float ALPHA, float *X, int INCX) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) X[i * INCX] = ALPHA;
|
||||
}
|
||||
|
||||
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) Y[i * INCY] *= X[i * INCX];
|
||||
}
|
||||
|
||||
void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) Y[i * INCY] = pow(X[i * INCX], ALPHA);
|
||||
}
|
||||
|
||||
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) Y[i * INCY] += ALPHA * X[i * INCX];
|
||||
}
|
||||
|
||||
void scal_cpu(int N, float ALPHA, float *X, int INCX) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) X[i * INCX] *= ALPHA;
|
||||
}
|
||||
|
||||
void fill_cpu(int N, float ALPHA, float *X, int INCX) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) X[i * INCX] = ALPHA;
|
||||
}
|
||||
|
||||
void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) {
|
||||
int i, j;
|
||||
int index = 0;
|
||||
for (j = 0; j < B; ++j) {
|
||||
for (i = 0; i < NX; ++i) {
|
||||
if (X) X[j * NX + i] += OUT[index];
|
||||
++index;
|
||||
}
|
||||
for (i = 0; i < NY; ++i) {
|
||||
if (Y) Y[j * NY + i] += OUT[index];
|
||||
++index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) {
|
||||
int i, j;
|
||||
int index = 0;
|
||||
for (j = 0; j < B; ++j) {
|
||||
for (i = 0; i < NX; ++i) {
|
||||
OUT[index++] = X[j * NX + i];
|
||||
}
|
||||
for (i = 0; i < NY; ++i) {
|
||||
OUT[index++] = Y[j * NY + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) Y[i * INCY] = X[i * INCX];
|
||||
}
|
||||
|
||||
void mult_add_into_cpu(int N, float *X, float *Y, float *Z) {
|
||||
int i;
|
||||
for (i = 0; i < N; ++i) Z[i] += X[i] * Y[i];
|
||||
}
|
||||
|
||||
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
float diff = truth[i] - pred[i];
|
||||
float abs_val = fabs(diff);
|
||||
if (abs_val < 1) {
|
||||
error[i] = diff * diff;
|
||||
delta[i] = diff;
|
||||
} else {
|
||||
error[i] = 2 * abs_val - 1;
|
||||
delta[i] = (diff < 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
float diff = truth[i] - pred[i];
|
||||
error[i] = fabs(diff);
|
||||
delta[i] = diff > 0 ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
float t = truth[i];
|
||||
float p = pred[i];
|
||||
error[i] = (t) ? -log(p) : 0;
|
||||
delta[i] = t - p;
|
||||
}
|
||||
}
|
||||
|
||||
void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
float t = truth[i];
|
||||
float p = pred[i];
|
||||
error[i] = -t * log(p) - (1 - t) * log(1 - p);
|
||||
delta[i] = t - p;
|
||||
}
|
||||
}
|
||||
|
||||
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
float diff = truth[i] - pred[i];
|
||||
error[i] = diff * diff;
|
||||
delta[i] = diff;
|
||||
}
|
||||
}
|
||||
|
||||
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) {
|
||||
int i;
|
||||
float dot = 0;
|
||||
for (i = 0; i < N; ++i) dot += X[i * INCX] * Y[i * INCY];
|
||||
return dot;
|
||||
}
|
||||
|
||||
void softmax(float *input, int n, float temp, int stride, float *output) {
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -FLT_MAX;
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (input[i * stride] > largest) largest = input[i * stride];
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
float e = exp(input[i * stride] / temp - largest / temp);
|
||||
sum += e;
|
||||
output[i * stride] = e;
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
output[i * stride] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp,
|
||||
float *output) {
|
||||
int g, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (g = 0; g < groups; ++g) {
|
||||
softmax(input + b * batch_offset + g * group_offset, n, temp, stride,
|
||||
output + b * batch_offset + g * group_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) {
|
||||
int i, j, k, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (k = 0; k < c; ++k) {
|
||||
for (j = 0; j < h * stride; ++j) {
|
||||
for (i = 0; i < w * stride; ++i) {
|
||||
int in_index = b * w * h * c + k * w * h + (j / stride) * w + i / stride;
|
||||
int out_index = b * w * h * c * stride * stride + k * w * h * stride * stride + j * w * stride + i;
|
||||
if (forward) out[out_index] = scale * in[in_index];
|
||||
else in[in_index] += scale * out[out_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
41
src/blas.h
41
src/blas.h
|
@ -1,51 +1,80 @@
|
|||
#ifndef BLAS_H
|
||||
#define BLAS_H
|
||||
|
||||
#include "darknet.h"
|
||||
|
||||
void flatten(float *x, int size, int layers, int batch, int forward);
|
||||
|
||||
void pm(int M, int N, float *A);
|
||||
|
||||
float *random_matrix(int rows, int cols);
|
||||
|
||||
void time_random_matrix(int TA, int TB, int m, int k, int n);
|
||||
|
||||
void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
|
||||
|
||||
void test_blas();
|
||||
|
||||
void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
|
||||
|
||||
void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
|
||||
|
||||
void mult_add_into_cpu(int N, float *X, float *Y, float *Z);
|
||||
|
||||
void const_cpu(int N, float ALPHA, float *X, int INCX);
|
||||
void constrain_gpu(int N, float ALPHA, float * X, int INCX);
|
||||
|
||||
void constrain_gpu(int N, float ALPHA, float *X, int INCX);
|
||||
|
||||
void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
|
||||
|
||||
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY);
|
||||
|
||||
int test_gpu_blas();
|
||||
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out);
|
||||
|
||||
void
|
||||
shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out);
|
||||
|
||||
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
|
||||
|
||||
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
|
||||
|
||||
void scale_bias(float *output, float *scales, int batch, int n, int size);
|
||||
|
||||
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
|
||||
|
||||
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
|
||||
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
|
||||
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
|
||||
|
||||
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial,
|
||||
float *variance_delta);
|
||||
|
||||
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch,
|
||||
int filters, int spatial, float *delta);
|
||||
|
||||
void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial);
|
||||
|
||||
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
|
||||
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
|
||||
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
|
||||
void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
|
||||
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
|
||||
|
||||
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
|
||||
|
||||
void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc);
|
||||
|
||||
void softmax(float *input, int n, float temp, int stride, float *output);
|
||||
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
|
||||
|
||||
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp,
|
||||
float *output);
|
||||
|
||||
void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);
|
||||
|
||||
#ifdef GPU
|
||||
#include "cuda.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "tree.h"
|
||||
|
||||
void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
357
src/box.c
357
src/box.c
|
@ -1,357 +0,0 @@
|
|||
#include "box.h"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int nms_comparator(const void *pa, const void *pb)
|
||||
{
|
||||
detection a = *(detection *)pa;
|
||||
detection b = *(detection *)pb;
|
||||
float diff = 0;
|
||||
if(b.sort_class >= 0){
|
||||
diff = a.prob[b.sort_class] - b.prob[b.sort_class];
|
||||
} else {
|
||||
diff = a.objectness - b.objectness;
|
||||
}
|
||||
if(diff < 0) return 1;
|
||||
else if(diff > 0) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void do_nms_obj(detection *dets, int total, int classes, float thresh)
|
||||
{
|
||||
int i, j, k;
|
||||
k = total-1;
|
||||
for(i = 0; i <= k; ++i){
|
||||
if(dets[i].objectness == 0){
|
||||
detection swap = dets[i];
|
||||
dets[i] = dets[k];
|
||||
dets[k] = swap;
|
||||
--k;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
total = k+1;
|
||||
|
||||
for(i = 0; i < total; ++i){
|
||||
dets[i].sort_class = -1;
|
||||
}
|
||||
|
||||
qsort(dets, total, sizeof(detection), nms_comparator);
|
||||
for(i = 0; i < total; ++i){
|
||||
if(dets[i].objectness == 0) continue;
|
||||
box a = dets[i].bbox;
|
||||
for(j = i+1; j < total; ++j){
|
||||
if(dets[j].objectness == 0) continue;
|
||||
box b = dets[j].bbox;
|
||||
if (box_iou(a, b) > thresh){
|
||||
dets[j].objectness = 0;
|
||||
for(k = 0; k < classes; ++k){
|
||||
dets[j].prob[k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void do_nms_sort(detection *dets, int total, int classes, float thresh)
|
||||
{
|
||||
int i, j, k;
|
||||
k = total-1;
|
||||
for(i = 0; i <= k; ++i){
|
||||
if(dets[i].objectness == 0){
|
||||
detection swap = dets[i];
|
||||
dets[i] = dets[k];
|
||||
dets[k] = swap;
|
||||
--k;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
total = k+1;
|
||||
|
||||
for(k = 0; k < classes; ++k){
|
||||
for(i = 0; i < total; ++i){
|
||||
dets[i].sort_class = k;
|
||||
}
|
||||
qsort(dets, total, sizeof(detection), nms_comparator);
|
||||
for(i = 0; i < total; ++i){
|
||||
if(dets[i].prob[k] == 0) continue;
|
||||
box a = dets[i].bbox;
|
||||
for(j = i+1; j < total; ++j){
|
||||
box b = dets[j].bbox;
|
||||
if (box_iou(a, b) > thresh){
|
||||
dets[j].prob[k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
box float_to_box(float *f, int stride)
|
||||
{
|
||||
box b = {0};
|
||||
b.x = f[0];
|
||||
b.y = f[1*stride];
|
||||
b.w = f[2*stride];
|
||||
b.h = f[3*stride];
|
||||
return b;
|
||||
}
|
||||
|
||||
dbox derivative(box a, box b)
|
||||
{
|
||||
dbox d;
|
||||
d.dx = 0;
|
||||
d.dw = 0;
|
||||
float l1 = a.x - a.w/2;
|
||||
float l2 = b.x - b.w/2;
|
||||
if (l1 > l2){
|
||||
d.dx -= 1;
|
||||
d.dw += .5;
|
||||
}
|
||||
float r1 = a.x + a.w/2;
|
||||
float r2 = b.x + b.w/2;
|
||||
if(r1 < r2){
|
||||
d.dx += 1;
|
||||
d.dw += .5;
|
||||
}
|
||||
if (l1 > r2) {
|
||||
d.dx = -1;
|
||||
d.dw = 0;
|
||||
}
|
||||
if (r1 < l2){
|
||||
d.dx = 1;
|
||||
d.dw = 0;
|
||||
}
|
||||
|
||||
d.dy = 0;
|
||||
d.dh = 0;
|
||||
float t1 = a.y - a.h/2;
|
||||
float t2 = b.y - b.h/2;
|
||||
if (t1 > t2){
|
||||
d.dy -= 1;
|
||||
d.dh += .5;
|
||||
}
|
||||
float b1 = a.y + a.h/2;
|
||||
float b2 = b.y + b.h/2;
|
||||
if(b1 < b2){
|
||||
d.dy += 1;
|
||||
d.dh += .5;
|
||||
}
|
||||
if (t1 > b2) {
|
||||
d.dy = -1;
|
||||
d.dh = 0;
|
||||
}
|
||||
if (b1 < t2){
|
||||
d.dy = 1;
|
||||
d.dh = 0;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
float overlap(float x1, float w1, float x2, float w2)
|
||||
{
|
||||
float l1 = x1 - w1/2;
|
||||
float l2 = x2 - w2/2;
|
||||
float left = l1 > l2 ? l1 : l2;
|
||||
float r1 = x1 + w1/2;
|
||||
float r2 = x2 + w2/2;
|
||||
float right = r1 < r2 ? r1 : r2;
|
||||
return right - left;
|
||||
}
|
||||
|
||||
float box_intersection(box a, box b)
|
||||
{
|
||||
float w = overlap(a.x, a.w, b.x, b.w);
|
||||
float h = overlap(a.y, a.h, b.y, b.h);
|
||||
if(w < 0 || h < 0) return 0;
|
||||
float area = w*h;
|
||||
return area;
|
||||
}
|
||||
|
||||
float box_union(box a, box b)
|
||||
{
|
||||
float i = box_intersection(a, b);
|
||||
float u = a.w*a.h + b.w*b.h - i;
|
||||
return u;
|
||||
}
|
||||
|
||||
float box_iou(box a, box b)
|
||||
{
|
||||
return box_intersection(a, b)/box_union(a, b);
|
||||
}
|
||||
|
||||
float box_rmse(box a, box b)
|
||||
{
|
||||
return sqrt(pow(a.x-b.x, 2) +
|
||||
pow(a.y-b.y, 2) +
|
||||
pow(a.w-b.w, 2) +
|
||||
pow(a.h-b.h, 2));
|
||||
}
|
||||
|
||||
dbox dintersect(box a, box b)
|
||||
{
|
||||
float w = overlap(a.x, a.w, b.x, b.w);
|
||||
float h = overlap(a.y, a.h, b.y, b.h);
|
||||
dbox dover = derivative(a, b);
|
||||
dbox di;
|
||||
|
||||
di.dw = dover.dw*h;
|
||||
di.dx = dover.dx*h;
|
||||
di.dh = dover.dh*w;
|
||||
di.dy = dover.dy*w;
|
||||
|
||||
return di;
|
||||
}
|
||||
|
||||
dbox dunion(box a, box b)
|
||||
{
|
||||
dbox du;
|
||||
|
||||
dbox di = dintersect(a, b);
|
||||
du.dw = a.h - di.dw;
|
||||
du.dh = a.w - di.dh;
|
||||
du.dx = -di.dx;
|
||||
du.dy = -di.dy;
|
||||
|
||||
return du;
|
||||
}
|
||||
|
||||
|
||||
void test_dunion()
|
||||
{
|
||||
box a = {0, 0, 1, 1};
|
||||
box dxa= {0+.0001, 0, 1, 1};
|
||||
box dya= {0, 0+.0001, 1, 1};
|
||||
box dwa= {0, 0, 1+.0001, 1};
|
||||
box dha= {0, 0, 1, 1+.0001};
|
||||
|
||||
box b = {.5, .5, .2, .2};
|
||||
dbox di = dunion(a,b);
|
||||
printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
|
||||
float inter = box_union(a, b);
|
||||
float xinter = box_union(dxa, b);
|
||||
float yinter = box_union(dya, b);
|
||||
float winter = box_union(dwa, b);
|
||||
float hinter = box_union(dha, b);
|
||||
xinter = (xinter - inter)/(.0001);
|
||||
yinter = (yinter - inter)/(.0001);
|
||||
winter = (winter - inter)/(.0001);
|
||||
hinter = (hinter - inter)/(.0001);
|
||||
printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
|
||||
}
|
||||
void test_dintersect()
|
||||
{
|
||||
box a = {0, 0, 1, 1};
|
||||
box dxa= {0+.0001, 0, 1, 1};
|
||||
box dya= {0, 0+.0001, 1, 1};
|
||||
box dwa= {0, 0, 1+.0001, 1};
|
||||
box dha= {0, 0, 1, 1+.0001};
|
||||
|
||||
box b = {.5, .5, .2, .2};
|
||||
dbox di = dintersect(a,b);
|
||||
printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
|
||||
float inter = box_intersection(a, b);
|
||||
float xinter = box_intersection(dxa, b);
|
||||
float yinter = box_intersection(dya, b);
|
||||
float winter = box_intersection(dwa, b);
|
||||
float hinter = box_intersection(dha, b);
|
||||
xinter = (xinter - inter)/(.0001);
|
||||
yinter = (yinter - inter)/(.0001);
|
||||
winter = (winter - inter)/(.0001);
|
||||
hinter = (hinter - inter)/(.0001);
|
||||
printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
|
||||
}
|
||||
|
||||
void test_box()
|
||||
{
|
||||
test_dintersect();
|
||||
test_dunion();
|
||||
box a = {0, 0, 1, 1};
|
||||
box dxa= {0+.00001, 0, 1, 1};
|
||||
box dya= {0, 0+.00001, 1, 1};
|
||||
box dwa= {0, 0, 1+.00001, 1};
|
||||
box dha= {0, 0, 1, 1+.00001};
|
||||
|
||||
box b = {.5, 0, .2, .2};
|
||||
|
||||
float iou = box_iou(a,b);
|
||||
iou = (1-iou)*(1-iou);
|
||||
printf("%f\n", iou);
|
||||
dbox d = diou(a, b);
|
||||
printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh);
|
||||
|
||||
float xiou = box_iou(dxa, b);
|
||||
float yiou = box_iou(dya, b);
|
||||
float wiou = box_iou(dwa, b);
|
||||
float hiou = box_iou(dha, b);
|
||||
xiou = ((1-xiou)*(1-xiou) - iou)/(.00001);
|
||||
yiou = ((1-yiou)*(1-yiou) - iou)/(.00001);
|
||||
wiou = ((1-wiou)*(1-wiou) - iou)/(.00001);
|
||||
hiou = ((1-hiou)*(1-hiou) - iou)/(.00001);
|
||||
printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou);
|
||||
}
|
||||
|
||||
dbox diou(box a, box b)
|
||||
{
|
||||
float u = box_union(a,b);
|
||||
float i = box_intersection(a,b);
|
||||
dbox di = dintersect(a,b);
|
||||
dbox du = dunion(a,b);
|
||||
dbox dd = {0,0,0,0};
|
||||
|
||||
if(i <= 0 || 1) {
|
||||
dd.dx = b.x - a.x;
|
||||
dd.dy = b.y - a.y;
|
||||
dd.dw = b.w - a.w;
|
||||
dd.dh = b.h - a.h;
|
||||
return dd;
|
||||
}
|
||||
|
||||
dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u);
|
||||
dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u);
|
||||
dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u);
|
||||
dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u);
|
||||
return dd;
|
||||
}
|
||||
|
||||
|
||||
void do_nms(box *boxes, float **probs, int total, int classes, float thresh)
|
||||
{
|
||||
int i, j, k;
|
||||
for(i = 0; i < total; ++i){
|
||||
int any = 0;
|
||||
for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0);
|
||||
if(!any) {
|
||||
continue;
|
||||
}
|
||||
for(j = i+1; j < total; ++j){
|
||||
if (box_iou(boxes[i], boxes[j]) > thresh){
|
||||
for(k = 0; k < classes; ++k){
|
||||
if (probs[i][k] < probs[j][k]) probs[i][k] = 0;
|
||||
else probs[j][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
box encode_box(box b, box anchor)
|
||||
{
|
||||
box encode;
|
||||
encode.x = (b.x - anchor.x) / anchor.w;
|
||||
encode.y = (b.y - anchor.y) / anchor.h;
|
||||
encode.w = log2(b.w / anchor.w);
|
||||
encode.h = log2(b.h / anchor.h);
|
||||
return encode;
|
||||
}
|
||||
|
||||
box decode_box(box b, box anchor)
|
||||
{
|
||||
box decode;
|
||||
decode.x = b.x * anchor.w + anchor.x;
|
||||
decode.y = b.y * anchor.h + anchor.y;
|
||||
decode.w = pow(2., b.w) * anchor.w;
|
||||
decode.h = pow(2., b.h) * anchor.h;
|
||||
return decode;
|
||||
}
|
|
@ -0,0 +1,339 @@
|
|||
#include "box.h"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int nms_comparator(const void *pa, const void *pb) {
|
||||
detection a = *(detection *) pa;
|
||||
detection b = *(detection *) pb;
|
||||
float diff = 0;
|
||||
if (b.sort_class >= 0) {
|
||||
diff = a.prob[b.sort_class] - b.prob[b.sort_class];
|
||||
} else {
|
||||
diff = a.objectness - b.objectness;
|
||||
}
|
||||
if (diff < 0) return 1;
|
||||
else if (diff > 0) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void do_nms_obj(detection *dets, int total, int classes, float thresh) {
|
||||
int i, j, k;
|
||||
k = total - 1;
|
||||
for (i = 0; i <= k; ++i) {
|
||||
if (dets[i].objectness == 0) {
|
||||
detection swap = dets[i];
|
||||
dets[i] = dets[k];
|
||||
dets[k] = swap;
|
||||
--k;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
total = k + 1;
|
||||
|
||||
for (i = 0; i < total; ++i) {
|
||||
dets[i].sort_class = -1;
|
||||
}
|
||||
|
||||
qsort(dets, total, sizeof(detection), nms_comparator);
|
||||
for (i = 0; i < total; ++i) {
|
||||
if (dets[i].objectness == 0) continue;
|
||||
box a = dets[i].bbox;
|
||||
for (j = i + 1; j < total; ++j) {
|
||||
if (dets[j].objectness == 0) continue;
|
||||
box b = dets[j].bbox;
|
||||
if (box_iou(a, b) > thresh) {
|
||||
dets[j].objectness = 0;
|
||||
for (k = 0; k < classes; ++k) {
|
||||
dets[j].prob[k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void do_nms_sort(detection *dets, int total, int classes, float thresh) {
|
||||
int i, j, k;
|
||||
k = total - 1;
|
||||
for (i = 0; i <= k; ++i) {
|
||||
if (dets[i].objectness == 0) {
|
||||
detection swap = dets[i];
|
||||
dets[i] = dets[k];
|
||||
dets[k] = swap;
|
||||
--k;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
total = k + 1;
|
||||
|
||||
for (k = 0; k < classes; ++k) {
|
||||
for (i = 0; i < total; ++i) {
|
||||
dets[i].sort_class = k;
|
||||
}
|
||||
qsort(dets, total, sizeof(detection), nms_comparator);
|
||||
for (i = 0; i < total; ++i) {
|
||||
if (dets[i].prob[k] == 0) continue;
|
||||
box a = dets[i].bbox;
|
||||
for (j = i + 1; j < total; ++j) {
|
||||
box b = dets[j].bbox;
|
||||
if (box_iou(a, b) > thresh) {
|
||||
dets[j].prob[k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
box float_to_box(float *f, int stride) {
|
||||
box b = {0};
|
||||
b.x = f[0];
|
||||
b.y = f[1 * stride];
|
||||
b.w = f[2 * stride];
|
||||
b.h = f[3 * stride];
|
||||
return b;
|
||||
}
|
||||
|
||||
dbox derivative(box a, box b) {
|
||||
dbox d;
|
||||
d.dx = 0;
|
||||
d.dw = 0;
|
||||
float l1 = a.x - a.w / 2;
|
||||
float l2 = b.x - b.w / 2;
|
||||
if (l1 > l2) {
|
||||
d.dx -= 1;
|
||||
d.dw += .5;
|
||||
}
|
||||
float r1 = a.x + a.w / 2;
|
||||
float r2 = b.x + b.w / 2;
|
||||
if (r1 < r2) {
|
||||
d.dx += 1;
|
||||
d.dw += .5;
|
||||
}
|
||||
if (l1 > r2) {
|
||||
d.dx = -1;
|
||||
d.dw = 0;
|
||||
}
|
||||
if (r1 < l2) {
|
||||
d.dx = 1;
|
||||
d.dw = 0;
|
||||
}
|
||||
|
||||
d.dy = 0;
|
||||
d.dh = 0;
|
||||
float t1 = a.y - a.h / 2;
|
||||
float t2 = b.y - b.h / 2;
|
||||
if (t1 > t2) {
|
||||
d.dy -= 1;
|
||||
d.dh += .5;
|
||||
}
|
||||
float b1 = a.y + a.h / 2;
|
||||
float b2 = b.y + b.h / 2;
|
||||
if (b1 < b2) {
|
||||
d.dy += 1;
|
||||
d.dh += .5;
|
||||
}
|
||||
if (t1 > b2) {
|
||||
d.dy = -1;
|
||||
d.dh = 0;
|
||||
}
|
||||
if (b1 < t2) {
|
||||
d.dy = 1;
|
||||
d.dh = 0;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
float overlap(float x1, float w1, float x2, float w2) {
|
||||
float l1 = x1 - w1 / 2;
|
||||
float l2 = x2 - w2 / 2;
|
||||
float left = l1 > l2 ? l1 : l2;
|
||||
float r1 = x1 + w1 / 2;
|
||||
float r2 = x2 + w2 / 2;
|
||||
float right = r1 < r2 ? r1 : r2;
|
||||
return right - left;
|
||||
}
|
||||
|
||||
float box_intersection(box a, box b) {
|
||||
float w = overlap(a.x, a.w, b.x, b.w);
|
||||
float h = overlap(a.y, a.h, b.y, b.h);
|
||||
if (w < 0 || h < 0) return 0;
|
||||
float area = w * h;
|
||||
return area;
|
||||
}
|
||||
|
||||
float box_union(box a, box b) {
|
||||
float i = box_intersection(a, b);
|
||||
float u = a.w * a.h + b.w * b.h - i;
|
||||
return u;
|
||||
}
|
||||
|
||||
float box_iou(box a, box b) {
|
||||
return box_intersection(a, b) / box_union(a, b);
|
||||
}
|
||||
|
||||
float box_rmse(box a, box b) {
|
||||
return sqrt(pow(a.x - b.x, 2) +
|
||||
pow(a.y - b.y, 2) +
|
||||
pow(a.w - b.w, 2) +
|
||||
pow(a.h - b.h, 2));
|
||||
}
|
||||
|
||||
dbox dintersect(box a, box b) {
|
||||
float w = overlap(a.x, a.w, b.x, b.w);
|
||||
float h = overlap(a.y, a.h, b.y, b.h);
|
||||
dbox dover = derivative(a, b);
|
||||
dbox di;
|
||||
|
||||
di.dw = dover.dw * h;
|
||||
di.dx = dover.dx * h;
|
||||
di.dh = dover.dh * w;
|
||||
di.dy = dover.dy * w;
|
||||
|
||||
return di;
|
||||
}
|
||||
|
||||
dbox dunion(box a, box b) {
|
||||
dbox du;
|
||||
|
||||
dbox di = dintersect(a, b);
|
||||
du.dw = a.h - di.dw;
|
||||
du.dh = a.w - di.dh;
|
||||
du.dx = -di.dx;
|
||||
du.dy = -di.dy;
|
||||
|
||||
return du;
|
||||
}
|
||||
|
||||
|
||||
void test_dunion() {
|
||||
box a = {0, 0, 1, 1};
|
||||
box dxa = {0 + .0001, 0, 1, 1};
|
||||
box dya = {0, 0 + .0001, 1, 1};
|
||||
box dwa = {0, 0, 1 + .0001, 1};
|
||||
box dha = {0, 0, 1, 1 + .0001};
|
||||
|
||||
box b = {.5, .5, .2, .2};
|
||||
dbox di = dunion(a, b);
|
||||
printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
|
||||
float inter = box_union(a, b);
|
||||
float xinter = box_union(dxa, b);
|
||||
float yinter = box_union(dya, b);
|
||||
float winter = box_union(dwa, b);
|
||||
float hinter = box_union(dha, b);
|
||||
xinter = (xinter - inter) / (.0001);
|
||||
yinter = (yinter - inter) / (.0001);
|
||||
winter = (winter - inter) / (.0001);
|
||||
hinter = (hinter - inter) / (.0001);
|
||||
printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
|
||||
}
|
||||
|
||||
void test_dintersect() {
|
||||
box a = {0, 0, 1, 1};
|
||||
box dxa = {0 + .0001, 0, 1, 1};
|
||||
box dya = {0, 0 + .0001, 1, 1};
|
||||
box dwa = {0, 0, 1 + .0001, 1};
|
||||
box dha = {0, 0, 1, 1 + .0001};
|
||||
|
||||
box b = {.5, .5, .2, .2};
|
||||
dbox di = dintersect(a, b);
|
||||
printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh);
|
||||
float inter = box_intersection(a, b);
|
||||
float xinter = box_intersection(dxa, b);
|
||||
float yinter = box_intersection(dya, b);
|
||||
float winter = box_intersection(dwa, b);
|
||||
float hinter = box_intersection(dha, b);
|
||||
xinter = (xinter - inter) / (.0001);
|
||||
yinter = (yinter - inter) / (.0001);
|
||||
winter = (winter - inter) / (.0001);
|
||||
hinter = (hinter - inter) / (.0001);
|
||||
printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter);
|
||||
}
|
||||
|
||||
void test_box() {
|
||||
test_dintersect();
|
||||
test_dunion();
|
||||
box a = {0, 0, 1, 1};
|
||||
box dxa = {0 + .00001, 0, 1, 1};
|
||||
box dya = {0, 0 + .00001, 1, 1};
|
||||
box dwa = {0, 0, 1 + .00001, 1};
|
||||
box dha = {0, 0, 1, 1 + .00001};
|
||||
|
||||
box b = {.5, 0, .2, .2};
|
||||
|
||||
float iou = box_iou(a, b);
|
||||
iou = (1 - iou) * (1 - iou);
|
||||
printf("%f\n", iou);
|
||||
dbox d = diou(a, b);
|
||||
printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh);
|
||||
|
||||
float xiou = box_iou(dxa, b);
|
||||
float yiou = box_iou(dya, b);
|
||||
float wiou = box_iou(dwa, b);
|
||||
float hiou = box_iou(dha, b);
|
||||
xiou = ((1 - xiou) * (1 - xiou) - iou) / (.00001);
|
||||
yiou = ((1 - yiou) * (1 - yiou) - iou) / (.00001);
|
||||
wiou = ((1 - wiou) * (1 - wiou) - iou) / (.00001);
|
||||
hiou = ((1 - hiou) * (1 - hiou) - iou) / (.00001);
|
||||
printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou);
|
||||
}
|
||||
|
||||
dbox diou(box a, box b) {
|
||||
float u = box_union(a, b);
|
||||
float i = box_intersection(a, b);
|
||||
dbox di = dintersect(a, b);
|
||||
dbox du = dunion(a, b);
|
||||
dbox dd = {0, 0, 0, 0};
|
||||
|
||||
if (i <= 0 || 1) {
|
||||
dd.dx = b.x - a.x;
|
||||
dd.dy = b.y - a.y;
|
||||
dd.dw = b.w - a.w;
|
||||
dd.dh = b.h - a.h;
|
||||
return dd;
|
||||
}
|
||||
|
||||
dd.dx = 2 * pow((1 - (i / u)), 1) * (di.dx * u - du.dx * i) / (u * u);
|
||||
dd.dy = 2 * pow((1 - (i / u)), 1) * (di.dy * u - du.dy * i) / (u * u);
|
||||
dd.dw = 2 * pow((1 - (i / u)), 1) * (di.dw * u - du.dw * i) / (u * u);
|
||||
dd.dh = 2 * pow((1 - (i / u)), 1) * (di.dh * u - du.dh * i) / (u * u);
|
||||
return dd;
|
||||
}
|
||||
|
||||
|
||||
void do_nms(box *boxes, float **probs, int total, int classes, float thresh) {
|
||||
int i, j, k;
|
||||
for (i = 0; i < total; ++i) {
|
||||
int any = 0;
|
||||
for (k = 0; k < classes; ++k) any = any || (probs[i][k] > 0);
|
||||
if (!any) {
|
||||
continue;
|
||||
}
|
||||
for (j = i + 1; j < total; ++j) {
|
||||
if (box_iou(boxes[i], boxes[j]) > thresh) {
|
||||
for (k = 0; k < classes; ++k) {
|
||||
if (probs[i][k] < probs[j][k]) probs[i][k] = 0;
|
||||
else probs[j][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
box encode_box(box b, box anchor) {
|
||||
box encode;
|
||||
encode.x = (b.x - anchor.x) / anchor.w;
|
||||
encode.y = (b.y - anchor.y) / anchor.h;
|
||||
encode.w = log2(b.w / anchor.w);
|
||||
encode.h = log2(b.h / anchor.h);
|
||||
return encode;
|
||||
}
|
||||
|
||||
box decode_box(box b, box anchor) {
|
||||
box decode;
|
||||
decode.x = b.x * anchor.w + anchor.x;
|
||||
decode.y = b.y * anchor.h + anchor.y;
|
||||
decode.w = pow(2., b.w) * anchor.w;
|
||||
decode.h = pow(2., b.h) * anchor.h;
|
||||
return decode;
|
||||
}
|
|
@ -1,14 +1,18 @@
|
|||
#ifndef BOX_H
|
||||
#define BOX_H
|
||||
|
||||
#include "darknet.h"
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
float dx, dy, dw, dh;
|
||||
} dbox;
|
||||
|
||||
float box_rmse(box a, box b);
|
||||
|
||||
dbox diou(box a, box b);
|
||||
|
||||
box decode_box(box b, box anchor);
|
||||
|
||||
box encode_box(box b, box anchor);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,23 +1,24 @@
|
|||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
void col2im_add_pixel(float *im, int height, int width, int channels,
|
||||
int row, int col, int channel, int pad, float val)
|
||||
{
|
||||
int row, int col, int channel, int pad, float val) {
|
||||
row -= pad;
|
||||
col -= pad;
|
||||
|
||||
if (row < 0 || col < 0 ||
|
||||
row >= height || col >= width) return;
|
||||
im[col + width*(row + height*channel)] += val;
|
||||
row >= height || col >= width)
|
||||
return;
|
||||
im[col + width * (row + height * channel)] += val;
|
||||
}
|
||||
|
||||
//This one might be too, can't remember.
|
||||
void col2im_cpu(float* data_col,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float* data_im)
|
||||
{
|
||||
int c,h,w;
|
||||
int height_col = (height + 2*pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2*pad - ksize) / stride + 1;
|
||||
void col2im_cpu(float *data_col,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float *data_im) {
|
||||
int c, h, w;
|
||||
int height_col = (height + 2 * pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2 * pad - ksize) / stride + 1;
|
||||
|
||||
int channels_col = channels * ksize * ksize;
|
||||
for (c = 0; c < channels_col; ++c) {
|
||||
|
@ -31,7 +32,7 @@ void col2im_cpu(float* data_col,
|
|||
int col_index = (c * height_col + h) * width_col + w;
|
||||
double val = data_col[col_index];
|
||||
col2im_add_pixel(data_im, height, width, channels,
|
||||
im_row, im_col, c_im, pad, val);
|
||||
im_row, im_col, c_im, pad, val);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
#ifndef COL2IM_H
|
||||
#define COL2IM_H
|
||||
|
||||
void col2im_cpu(float* data_col,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float* data_im);
|
||||
void col2im_cpu(float *data_col,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float *data_im);
|
||||
|
||||
#ifdef GPU
|
||||
void col2im_gpu(float *data_col,
|
||||
|
|
|
@ -1,23 +1,25 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "col2im.h"
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
#define BLOCK 512
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
|
||||
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
|
||||
|
||||
__global__ void col2im_gpu_kernel(const int n, const float* data_col,
|
||||
const int height, const int width, const int ksize,
|
||||
const int pad,
|
||||
const int stride,
|
||||
const int height_col, const int width_col,
|
||||
float *data_im) {
|
||||
int index = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
for(; index < n; index += blockDim.x*gridDim.x){
|
||||
__global__ void col2im_gpu_kernel(const int n, const float *data_col,
|
||||
const int height, const int width, const int ksize,
|
||||
const int pad,
|
||||
const int stride,
|
||||
const int height_col, const int width_col,
|
||||
float *data_im) {
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
for (; index < n; index += blockDim.x * gridDim.x) {
|
||||
float val = 0;
|
||||
int w = index % width + pad;
|
||||
int h = (index / width) % height + pad;
|
||||
|
@ -29,7 +31,7 @@ __global__ void col2im_gpu_kernel(const int n, const float* data_col,
|
|||
int h_col_end = min(h / stride + 1, height_col);
|
||||
// equivalent implementation
|
||||
int offset =
|
||||
(c * ksize * ksize + h * ksize + w) * height_col * width_col;
|
||||
(c * ksize * ksize + h * ksize + w) * height_col * width_col;
|
||||
int coeff_h_col = (1 - stride * ksize * height_col) * width_col;
|
||||
int coeff_w_col = (1 - stride * height_col * width_col);
|
||||
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
|
||||
|
@ -42,17 +44,17 @@ __global__ void col2im_gpu_kernel(const int n, const float* data_col,
|
|||
}
|
||||
|
||||
void col2im_gpu(float *data_col,
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float *data_im){
|
||||
int channels, int height, int width,
|
||||
int ksize, int stride, int pad, float *data_im) {
|
||||
// We are going to launch channels * height_col * width_col kernels, each
|
||||
// kernel responsible for copying a single-channel grid.
|
||||
int height_col = (height + 2 * pad - ksize) / stride + 1;
|
||||
int width_col = (width + 2 * pad - ksize) / stride + 1;
|
||||
int num_kernels = channels * height * width;
|
||||
col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK,
|
||||
BLOCK>>>(
|
||||
num_kernels, data_col, height, width, ksize, pad,
|
||||
stride, height_col,
|
||||
width_col, data_im);
|
||||
col2im_gpu_kernel<<<(num_kernels + BLOCK - 1) / BLOCK,
|
||||
BLOCK>>>(
|
||||
num_kernels, data_col, height, width, ksize, pad,
|
||||
stride, height_col,
|
||||
width_col, data_im);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,21 +7,20 @@
|
|||
#include "parser.h"
|
||||
#include "box.h"
|
||||
|
||||
void train_compare(char *cfgfile, char *weightfile)
|
||||
{
|
||||
void train_compare(char *cfgfile, char *weightfile) {
|
||||
srand(time(0));
|
||||
float avg_loss = -1;
|
||||
char *base = basecfg(cfgfile);
|
||||
char *backup_directory = "/home/pjreddie/backup/";
|
||||
printf("%s\n", base);
|
||||
network net = parse_network_cfg(cfgfile);
|
||||
if(weightfile){
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||
int imgs = 1024;
|
||||
list *plist = get_paths("data/compare.train.list");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
int N = plist->size;
|
||||
printf("%d\n", N);
|
||||
clock_t time;
|
||||
|
@ -40,64 +39,64 @@ void train_compare(char *cfgfile, char *weightfile)
|
|||
args.type = COMPARE_DATA;
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
int epoch = *net.seen/N;
|
||||
int epoch = *net.seen / N;
|
||||
int i = 0;
|
||||
while(1){
|
||||
while (1) {
|
||||
++i;
|
||||
time=clock();
|
||||
time = clock();
|
||||
pthread_join(load_thread, 0);
|
||||
train = buffer;
|
||||
|
||||
load_thread = load_data_in_thread(args);
|
||||
printf("Loaded: %lf seconds\n", sec(clock()-time));
|
||||
time=clock();
|
||||
printf("Loaded: %lf seconds\n", sec(clock() - time));
|
||||
time = clock();
|
||||
float loss = train_network(net, train);
|
||||
if(avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss*.9 + loss*.1;
|
||||
printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
|
||||
if (avg_loss == -1) avg_loss = loss;
|
||||
avg_loss = avg_loss * .9 + loss * .1;
|
||||
printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float) *net.seen / N, loss, avg_loss,
|
||||
sec(clock() - time), *net.seen);
|
||||
free_data(train);
|
||||
if(i%100 == 0){
|
||||
if (i % 100 == 0) {
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i);
|
||||
sprintf(buff, "%s/%s_%d_minor_%d.weights", backup_directory, base, epoch, i);
|
||||
save_weights(net, buff);
|
||||
}
|
||||
if(*net.seen/N > epoch){
|
||||
epoch = *net.seen/N;
|
||||
if (*net.seen / N > epoch) {
|
||||
epoch = *net.seen / N;
|
||||
i = 0;
|
||||
char buff[256];
|
||||
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
|
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, epoch);
|
||||
save_weights(net, buff);
|
||||
if(epoch%22 == 0) net.learning_rate *= .1;
|
||||
if (epoch % 22 == 0) net.learning_rate *= .1;
|
||||
}
|
||||
}
|
||||
pthread_join(load_thread, 0);
|
||||
free_data(buffer);
|
||||
free_network(net);
|
||||
free_ptrs((void**)paths, plist->size);
|
||||
free_ptrs((void **) paths, plist->size);
|
||||
free_list(plist);
|
||||
free(base);
|
||||
}
|
||||
|
||||
void validate_compare(char *filename, char *weightfile)
|
||||
{
|
||||
void validate_compare(char *filename, char *weightfile) {
|
||||
int i = 0;
|
||||
network net = parse_network_cfg(filename);
|
||||
if(weightfile){
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
srand(time(0));
|
||||
|
||||
list *plist = get_paths("data/compare.val.list");
|
||||
//list *plist = get_paths("data/compare.val.old");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
int N = plist->size/2;
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
int N = plist->size / 2;
|
||||
free_list(plist);
|
||||
|
||||
clock_t time;
|
||||
int correct = 0;
|
||||
int total = 0;
|
||||
int splits = 10;
|
||||
int num = (i+1)*N/splits - i*N/splits;
|
||||
int num = (i + 1) * N / splits - i * N / splits;
|
||||
|
||||
data val, buffer;
|
||||
|
||||
|
@ -112,35 +111,36 @@ void validate_compare(char *filename, char *weightfile)
|
|||
args.type = COMPARE_DATA;
|
||||
|
||||
pthread_t load_thread = load_data_in_thread(args);
|
||||
for(i = 1; i <= splits; ++i){
|
||||
time=clock();
|
||||
for (i = 1; i <= splits; ++i) {
|
||||
time = clock();
|
||||
|
||||
pthread_join(load_thread, 0);
|
||||
val = buffer;
|
||||
|
||||
num = (i+1)*N/splits - i*N/splits;
|
||||
char **part = paths+(i*N/splits);
|
||||
if(i != splits){
|
||||
num = (i + 1) * N / splits - i * N / splits;
|
||||
char **part = paths + (i * N / splits);
|
||||
if (i != splits) {
|
||||
args.paths = part;
|
||||
load_thread = load_data_in_thread(args);
|
||||
}
|
||||
printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
|
||||
printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock() - time));
|
||||
|
||||
time=clock();
|
||||
time = clock();
|
||||
matrix pred = network_predict_data(net, val);
|
||||
int j,k;
|
||||
for(j = 0; j < val.y.rows; ++j){
|
||||
for(k = 0; k < 20; ++k){
|
||||
if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){
|
||||
int j, k;
|
||||
for (j = 0; j < val.y.rows; ++j) {
|
||||
for (k = 0; k < 20; ++k) {
|
||||
if (val.y.vals[j][k * 2] != val.y.vals[j][k * 2 + 1]) {
|
||||
++total;
|
||||
if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){
|
||||
if ((val.y.vals[j][k * 2] < val.y.vals[j][k * 2 + 1]) ==
|
||||
(pred.vals[j][k * 2] < pred.vals[j][k * 2 + 1])) {
|
||||
++correct;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free_matrix(pred);
|
||||
printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows);
|
||||
printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float) correct / total, sec(clock() - time), val.X.rows);
|
||||
free_data(val);
|
||||
}
|
||||
}
|
||||
|
@ -148,7 +148,7 @@ void validate_compare(char *filename, char *weightfile)
|
|||
typedef struct {
|
||||
network net;
|
||||
char *filename;
|
||||
int class;
|
||||
int nclass;
|
||||
int classes;
|
||||
float elo;
|
||||
float *elos;
|
||||
|
@ -157,78 +157,73 @@ typedef struct {
|
|||
int total_compares = 0;
|
||||
int current_class = 0;
|
||||
|
||||
int elo_comparator(const void*a, const void *b)
|
||||
{
|
||||
sortable_bbox box1 = *(sortable_bbox*)a;
|
||||
sortable_bbox box2 = *(sortable_bbox*)b;
|
||||
if(box1.elos[current_class] == box2.elos[current_class]) return 0;
|
||||
if(box1.elos[current_class] > box2.elos[current_class]) return -1;
|
||||
int elo_comparator(const void *a, const void *b) {
|
||||
sortable_bbox box1 = *(sortable_bbox *) a;
|
||||
sortable_bbox box2 = *(sortable_bbox *) b;
|
||||
if (box1.elos[current_class] == box2.elos[current_class]) return 0;
|
||||
if (box1.elos[current_class] > box2.elos[current_class]) return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bbox_comparator(const void *a, const void *b)
|
||||
{
|
||||
int bbox_comparator(const void *a, const void *b) {
|
||||
++total_compares;
|
||||
sortable_bbox box1 = *(sortable_bbox*)a;
|
||||
sortable_bbox box2 = *(sortable_bbox*)b;
|
||||
sortable_bbox box1 = *(sortable_bbox *) a;
|
||||
sortable_bbox box2 = *(sortable_bbox *) b;
|
||||
network net = box1.net;
|
||||
int class = box1.class;
|
||||
int nclass = box1.nclass;
|
||||
|
||||
image im1 = load_image_color(box1.filename, net.w, net.h);
|
||||
image im2 = load_image_color(box2.filename, net.w, net.h);
|
||||
float *X = calloc(net.w*net.h*net.c, sizeof(float));
|
||||
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
|
||||
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
|
||||
float *X = (float *) calloc(net.w * net.h * net.c, sizeof(float));
|
||||
memcpy(X, im1.data, im1.w * im1.h * im1.c * sizeof(float));
|
||||
memcpy(X + im1.w * im1.h * im1.c, im2.data, im2.w * im2.h * im2.c * sizeof(float));
|
||||
float *predictions = network_predict(net, X);
|
||||
|
||||
|
||||
free_image(im1);
|
||||
free_image(im2);
|
||||
free(X);
|
||||
if (predictions[class*2] > predictions[class*2+1]){
|
||||
if (predictions[nclass * 2] > predictions[nclass * 2 + 1]) {
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void bbox_update(sortable_bbox *a, sortable_bbox *b, int class, int result)
|
||||
{
|
||||
void bbox_update(sortable_bbox *a, sortable_bbox *b, int nclass, int result) {
|
||||
int k = 32;
|
||||
float EA = 1./(1+pow(10, (b->elos[class] - a->elos[class])/400.));
|
||||
float EB = 1./(1+pow(10, (a->elos[class] - b->elos[class])/400.));
|
||||
float EA = 1. / (1 + pow(10, (b->elos[nclass] - a->elos[nclass]) / 400.));
|
||||
float EB = 1. / (1 + pow(10, (a->elos[nclass] - b->elos[nclass]) / 400.));
|
||||
float SA = result ? 1 : 0;
|
||||
float SB = result ? 0 : 1;
|
||||
a->elos[class] += k*(SA - EA);
|
||||
b->elos[class] += k*(SB - EB);
|
||||
a->elos[nclass] += k * (SA - EA);
|
||||
b->elos[nclass] += k * (SB - EB);
|
||||
}
|
||||
|
||||
void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class)
|
||||
{
|
||||
void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int nclass) {
|
||||
image im1 = load_image_color(a->filename, net.w, net.h);
|
||||
image im2 = load_image_color(b->filename, net.w, net.h);
|
||||
float *X = calloc(net.w*net.h*net.c, sizeof(float));
|
||||
memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float));
|
||||
memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
|
||||
float *X = (float *) calloc(net.w * net.h * net.c, sizeof(float));
|
||||
memcpy(X, im1.data, im1.w * im1.h * im1.c * sizeof(float));
|
||||
memcpy(X + im1.w * im1.h * im1.c, im2.data, im2.w * im2.h * im2.c * sizeof(float));
|
||||
float *predictions = network_predict(net, X);
|
||||
++total_compares;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < classes; ++i){
|
||||
if(class < 0 || class == i){
|
||||
int result = predictions[i*2] > predictions[i*2+1];
|
||||
for (i = 0; i < classes; ++i) {
|
||||
if (nclass < 0 || nclass == i) {
|
||||
int result = predictions[i * 2] > predictions[i * 2 + 1];
|
||||
bbox_update(a, b, i, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
free_image(im1);
|
||||
free_image(im2);
|
||||
free(X);
|
||||
}
|
||||
|
||||
void SortMaster3000(char *filename, char *weightfile)
|
||||
{
|
||||
void SortMaster3000(char *filename, char *weightfile) {
|
||||
int i = 0;
|
||||
network net = parse_network_cfg(filename);
|
||||
if(weightfile){
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
srand(time(0));
|
||||
|
@ -236,31 +231,30 @@ void SortMaster3000(char *filename, char *weightfile)
|
|||
|
||||
list *plist = get_paths("data/compare.sort.list");
|
||||
//list *plist = get_paths("data/compare.val.old");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
int N = plist->size;
|
||||
free_list(plist);
|
||||
sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
|
||||
sortable_bbox *boxes = (sortable_bbox *) calloc(N, sizeof(sortable_bbox));
|
||||
printf("Sorting %d boxes...\n", N);
|
||||
for(i = 0; i < N; ++i){
|
||||
for (i = 0; i < N; ++i) {
|
||||
boxes[i].filename = paths[i];
|
||||
boxes[i].net = net;
|
||||
boxes[i].class = 7;
|
||||
boxes[i].nclass = 7;
|
||||
boxes[i].elo = 1500;
|
||||
}
|
||||
clock_t time=clock();
|
||||
clock_t time = clock();
|
||||
qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator);
|
||||
for(i = 0; i < N; ++i){
|
||||
for (i = 0; i < N; ++i) {
|
||||
printf("%s\n", boxes[i].filename);
|
||||
}
|
||||
printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time));
|
||||
printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock() - time));
|
||||
}
|
||||
|
||||
void BattleRoyaleWithCheese(char *filename, char *weightfile)
|
||||
{
|
||||
void BattleRoyaleWithCheese(char *filename, char *weightfile) {
|
||||
int classes = 20;
|
||||
int i,j;
|
||||
int i, j;
|
||||
network net = parse_network_cfg(filename);
|
||||
if(weightfile){
|
||||
if (weightfile) {
|
||||
load_weights(&net, weightfile);
|
||||
}
|
||||
srand(time(0));
|
||||
|
@ -270,69 +264,68 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
|
|||
//list *plist = get_paths("data/compare.small.list");
|
||||
//list *plist = get_paths("data/compare.cat.list");
|
||||
//list *plist = get_paths("data/compare.val.old");
|
||||
char **paths = (char **)list_to_array(plist);
|
||||
char **paths = (char **) list_to_array(plist);
|
||||
int N = plist->size;
|
||||
int total = N;
|
||||
free_list(plist);
|
||||
sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
|
||||
sortable_bbox *boxes = (sortable_bbox *) calloc(N, sizeof(sortable_bbox));
|
||||
printf("Battling %d boxes...\n", N);
|
||||
for(i = 0; i < N; ++i){
|
||||
for (i = 0; i < N; ++i) {
|
||||
boxes[i].filename = paths[i];
|
||||
boxes[i].net = net;
|
||||
boxes[i].classes = classes;
|
||||
boxes[i].elos = calloc(classes, sizeof(float));;
|
||||
for(j = 0; j < classes; ++j){
|
||||
boxes[i].elos = (float *) calloc(classes, sizeof(float));;
|
||||
for (j = 0; j < classes; ++j) {
|
||||
boxes[i].elos[j] = 1500;
|
||||
}
|
||||
}
|
||||
int round;
|
||||
clock_t time=clock();
|
||||
for(round = 1; round <= 4; ++round){
|
||||
clock_t round_time=clock();
|
||||
clock_t time = clock();
|
||||
for (round = 1; round <= 4; ++round) {
|
||||
clock_t round_time = clock();
|
||||
printf("Round: %d\n", round);
|
||||
shuffle(boxes, N, sizeof(sortable_bbox));
|
||||
for(i = 0; i < N/2; ++i){
|
||||
bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1);
|
||||
for (i = 0; i < N / 2; ++i) {
|
||||
bbox_fight(net, boxes + i * 2, boxes + i * 2 + 1, classes, -1);
|
||||
}
|
||||
printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N);
|
||||
printf("Round: %f secs, %d remaining\n", sec(clock() - round_time), N);
|
||||
}
|
||||
|
||||
int class;
|
||||
int nclass;
|
||||
|
||||
for (class = 0; class < classes; ++class){
|
||||
for (nclass = 0; nclass < classes; ++nclass) {
|
||||
|
||||
N = total;
|
||||
current_class = class;
|
||||
current_class = nclass;
|
||||
qsort(boxes, N, sizeof(sortable_bbox), elo_comparator);
|
||||
N /= 2;
|
||||
|
||||
for(round = 1; round <= 100; ++round){
|
||||
clock_t round_time=clock();
|
||||
for (round = 1; round <= 100; ++round) {
|
||||
clock_t round_time = clock();
|
||||
printf("Round: %d\n", round);
|
||||
|
||||
sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10);
|
||||
for(i = 0; i < N/2; ++i){
|
||||
bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class);
|
||||
for (i = 0; i < N / 2; ++i) {
|
||||
bbox_fight(net, boxes + i * 2, boxes + i * 2 + 1, classes, nclass);
|
||||
}
|
||||
qsort(boxes, N, sizeof(sortable_bbox), elo_comparator);
|
||||
if(round <= 20) N = (N*9/10)/2*2;
|
||||
if (round <= 20) N = (N * 9 / 10) / 2 * 2;
|
||||
|
||||
printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N);
|
||||
printf("Round: %f secs, %d remaining\n", sec(clock() - round_time), N);
|
||||
}
|
||||
char buff[256];
|
||||
sprintf(buff, "results/battle_%d.log", class);
|
||||
sprintf(buff, "results/battle_%d.log", nclass);
|
||||
FILE *outfp = fopen(buff, "w");
|
||||
for(i = 0; i < N; ++i){
|
||||
fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class]);
|
||||
for (i = 0; i < N; ++i) {
|
||||
fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[nclass]);
|
||||
}
|
||||
fclose(outfp);
|
||||
}
|
||||
printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time));
|
||||
printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock() - time));
|
||||
}
|
||||
|
||||
void run_compare(int argc, char **argv)
|
||||
{
|
||||
if(argc < 4){
|
||||
void run_compare(int argc, char **argv) {
|
||||
if (argc < 4) {
|
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||
return;
|
||||
}
|
||||
|
@ -340,10 +333,10 @@ void run_compare(int argc, char **argv)
|
|||
char *cfg = argv[3];
|
||||
char *weights = (argc > 4) ? argv[4] : 0;
|
||||
//char *filename = (argc > 5) ? argv[5]: 0;
|
||||
if(0==strcmp(argv[2], "train")) train_compare(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights);
|
||||
if (0 == strcmp(argv[2], "train")) train_compare(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "valid")) validate_compare(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "sort")) SortMaster3000(cfg, weights);
|
||||
else if (0 == strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights);
|
||||
/*
|
||||
else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
|
||||
else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights);
|
|
@ -2,7 +2,7 @@
|
|||
#include "convolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
|
||||
|
@ -11,16 +11,19 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
|
||||
{
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam) {
|
||||
int i;
|
||||
layer l = {0};
|
||||
layer l = {(LAYER_TYPE) 0};
|
||||
l.learning_rate_scale = 1;
|
||||
l.type = CONNECTED;
|
||||
|
||||
l.inputs = inputs;
|
||||
l.outputs = outputs;
|
||||
l.batch=batch;
|
||||
l.batch = batch;
|
||||
l.batch_normalize = batch_normalize;
|
||||
l.h = 1;
|
||||
l.w = 1;
|
||||
|
@ -29,54 +32,54 @@ layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activa
|
|||
l.out_w = 1;
|
||||
l.out_c = outputs;
|
||||
|
||||
l.output = calloc(batch*outputs, sizeof(float));
|
||||
l.delta = calloc(batch*outputs, sizeof(float));
|
||||
l.output = (float *) calloc(batch * outputs, sizeof(float));
|
||||
l.delta = (float*) calloc(batch * outputs, sizeof(float));
|
||||
|
||||
l.weight_updates = calloc(inputs*outputs, sizeof(float));
|
||||
l.bias_updates = calloc(outputs, sizeof(float));
|
||||
l.weight_updates = (float*) calloc(inputs * outputs, sizeof(float));
|
||||
l.bias_updates = (float*) calloc(outputs, sizeof(float));
|
||||
|
||||
l.weights = calloc(outputs*inputs, sizeof(float));
|
||||
l.biases = calloc(outputs, sizeof(float));
|
||||
l.weights = (float*) calloc(outputs * inputs, sizeof(float));
|
||||
l.biases = (float*) calloc(outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_connected_layer;
|
||||
l.backward = backward_connected_layer;
|
||||
l.update = update_connected_layer;
|
||||
|
||||
//float scale = 1./sqrt(inputs);
|
||||
float scale = sqrt(2./inputs);
|
||||
for(i = 0; i < outputs*inputs; ++i){
|
||||
l.weights[i] = scale*rand_uniform(-1, 1);
|
||||
float scale = sqrt(2. / inputs);
|
||||
for (i = 0; i < outputs * inputs; ++i) {
|
||||
l.weights[i] = scale * rand_uniform(-1, 1);
|
||||
}
|
||||
|
||||
for(i = 0; i < outputs; ++i){
|
||||
for (i = 0; i < outputs; ++i) {
|
||||
l.biases[i] = 0;
|
||||
}
|
||||
|
||||
if(adam){
|
||||
l.m = calloc(l.inputs*l.outputs, sizeof(float));
|
||||
l.v = calloc(l.inputs*l.outputs, sizeof(float));
|
||||
l.bias_m = calloc(l.outputs, sizeof(float));
|
||||
l.scale_m = calloc(l.outputs, sizeof(float));
|
||||
l.bias_v = calloc(l.outputs, sizeof(float));
|
||||
l.scale_v = calloc(l.outputs, sizeof(float));
|
||||
if (adam) {
|
||||
l.m = (float *) calloc(l.inputs * l.outputs, sizeof(float));
|
||||
l.v = (float *) calloc(l.inputs * l.outputs, sizeof(float));
|
||||
l.bias_m = (float *) calloc(l.outputs, sizeof(float));
|
||||
l.scale_m = (float *) calloc(l.outputs, sizeof(float));
|
||||
l.bias_v = (float *) calloc(l.outputs, sizeof(float));
|
||||
l.scale_v = (float *) calloc(l.outputs, sizeof(float));
|
||||
}
|
||||
if(batch_normalize){
|
||||
l.scales = calloc(outputs, sizeof(float));
|
||||
l.scale_updates = calloc(outputs, sizeof(float));
|
||||
for(i = 0; i < outputs; ++i){
|
||||
if (batch_normalize) {
|
||||
l.scales = (float *) calloc(outputs, sizeof(float));
|
||||
l.scale_updates = (float *) calloc(outputs, sizeof(float));
|
||||
for (i = 0; i < outputs; ++i) {
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = calloc(outputs, sizeof(float));
|
||||
l.mean_delta = calloc(outputs, sizeof(float));
|
||||
l.variance = calloc(outputs, sizeof(float));
|
||||
l.variance_delta = calloc(outputs, sizeof(float));
|
||||
l.mean = (float *) calloc(outputs, sizeof(float));
|
||||
l.mean_delta = (float *) calloc(outputs, sizeof(float));
|
||||
l.variance = (float *) calloc(outputs, sizeof(float));
|
||||
l.variance_delta = (float *) calloc(outputs, sizeof(float));
|
||||
|
||||
l.rolling_mean = calloc(outputs, sizeof(float));
|
||||
l.rolling_variance = calloc(outputs, sizeof(float));
|
||||
l.rolling_mean = (float *) calloc(outputs, sizeof(float));
|
||||
l.rolling_variance = (float *) calloc(outputs, sizeof(float));
|
||||
|
||||
l.x = calloc(batch*outputs, sizeof(float));
|
||||
l.x_norm = calloc(batch*outputs, sizeof(float));
|
||||
l.x = (float *) calloc(batch * outputs, sizeof(float));
|
||||
l.x_norm = (float *) calloc(batch * outputs, sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
@ -117,10 +120,10 @@ layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activa
|
|||
l.x_gpu = cuda_make_array(l.output, l.batch*outputs);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs);
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
hipdnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
hipdnnSetTensor4dDescriptor(l.dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.normTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@ -129,48 +132,45 @@ layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activa
|
|||
return l;
|
||||
}
|
||||
|
||||
void update_connected_layer(layer l, update_args a)
|
||||
{
|
||||
float learning_rate = a.learning_rate*l.learning_rate_scale;
|
||||
void update_connected_layer(layer l, update_args a) {
|
||||
float learning_rate = a.learning_rate * l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
|
||||
axpy_cpu(l.outputs, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.outputs, momentum, l.bias_updates, 1);
|
||||
|
||||
if(l.batch_normalize){
|
||||
axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
|
||||
if (l.batch_normalize) {
|
||||
axpy_cpu(l.outputs, learning_rate / batch, l.scale_updates, 1, l.scales, 1);
|
||||
scal_cpu(l.outputs, momentum, l.scale_updates, 1);
|
||||
}
|
||||
|
||||
axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
|
||||
axpy_cpu(l.inputs * l.outputs, -decay * batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(l.inputs * l.outputs, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(l.inputs * l.outputs, momentum, l.weight_updates, 1);
|
||||
}
|
||||
|
||||
void forward_connected_layer(layer l, network net)
|
||||
{
|
||||
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
|
||||
void forward_connected_layer(layer l, network net) {
|
||||
fill_cpu(l.outputs * l.batch, 0, l.output, 1);
|
||||
int m = l.batch;
|
||||
int k = l.inputs;
|
||||
int n = l.outputs;
|
||||
float *a = net.input;
|
||||
float *b = l.weights;
|
||||
float *c = l.output;
|
||||
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
|
||||
if(l.batch_normalize){
|
||||
gemm(0, 1, m, n, k, 1, a, k, b, k, 1, c, n);
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.outputs, 1);
|
||||
}
|
||||
activate_array(l.output, l.outputs*l.batch, l.activation);
|
||||
activate_array(l.output, l.outputs * l.batch, l.activation);
|
||||
}
|
||||
|
||||
void backward_connected_layer(layer l, network net)
|
||||
{
|
||||
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
|
||||
void backward_connected_layer(layer l, network net) {
|
||||
gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
|
||||
|
||||
if(l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
backward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.outputs, 1);
|
||||
|
@ -182,7 +182,7 @@ void backward_connected_layer(layer l, network net)
|
|||
float *a = l.delta;
|
||||
float *b = net.input;
|
||||
float *c = l.weight_updates;
|
||||
gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
|
||||
gemm(1, 0, m, n, k, 1, a, m, b, n, 1, c, n);
|
||||
|
||||
m = l.batch;
|
||||
k = l.outputs;
|
||||
|
@ -192,17 +192,16 @@ void backward_connected_layer(layer l, network net)
|
|||
b = l.weights;
|
||||
c = net.delta;
|
||||
|
||||
if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
if (c) gemm(0, 0, m, n, k, 1, a, k, b, n, 1, c, n);
|
||||
}
|
||||
|
||||
|
||||
void denormalize_connected_layer(layer l)
|
||||
{
|
||||
void denormalize_connected_layer(layer l) {
|
||||
int i, j;
|
||||
for(i = 0; i < l.outputs; ++i){
|
||||
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
|
||||
for(j = 0; j < l.inputs; ++j){
|
||||
l.weights[i*l.inputs + j] *= scale;
|
||||
for (i = 0; i < l.outputs; ++i) {
|
||||
float scale = l.scales[i] / sqrt(l.rolling_variance[i] + .000001);
|
||||
for (j = 0; j < l.inputs; ++j) {
|
||||
l.weights[i * l.inputs + j] *= scale;
|
||||
}
|
||||
l.biases[i] -= l.rolling_mean[i] * scale;
|
||||
l.scales[i] = 1;
|
||||
|
@ -212,9 +211,8 @@ void denormalize_connected_layer(layer l)
|
|||
}
|
||||
|
||||
|
||||
void statistics_connected_layer(layer l)
|
||||
{
|
||||
if(l.batch_normalize){
|
||||
void statistics_connected_layer(layer l) {
|
||||
if (l.batch_normalize) {
|
||||
printf("Scales ");
|
||||
print_statistics(l.scales, l.outputs);
|
||||
/*
|
|
@ -8,7 +8,9 @@
|
|||
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam);
|
||||
|
||||
void forward_connected_layer(layer l, network net);
|
||||
|
||||
void backward_connected_layer(layer l, network net);
|
||||
|
||||
void update_connected_layer(layer l, update_args a);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "convolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "gemm.h"
|
||||
|
@ -11,83 +12,78 @@ extern "C" {
|
|||
#include "col2im.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
__global__ void binarize_kernel(float *x, int n, float *binary)
|
||||
{
|
||||
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
//}
|
||||
#define BLOCK 512
|
||||
|
||||
__global__ void binarize_kernel(float *x, int n, float *binary) {
|
||||
int i = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (i >= n) return;
|
||||
binary[i] = (x[i] >= 0) ? 1 : -1;
|
||||
}
|
||||
|
||||
void binarize_gpu(float *x, int n, float *binary)
|
||||
{
|
||||
void binarize_gpu(float *x, int n, float *binary) {
|
||||
binarize_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, binary);
|
||||
check_error(cudaPeekAtLastError());
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
__global__ void binarize_input_kernel(float *input, int n, int size, float *binary)
|
||||
{
|
||||
int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) {
|
||||
int s = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (s >= size) return;
|
||||
int i = 0;
|
||||
float mean = 0;
|
||||
for(i = 0; i < n; ++i){
|
||||
mean += fabsf(input[i*size + s]);
|
||||
for (i = 0; i < n; ++i) {
|
||||
mean += fabsf(input[i * size + s]);
|
||||
}
|
||||
mean = mean / n;
|
||||
for(i = 0; i < n; ++i){
|
||||
binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
|
||||
for (i = 0; i < n; ++i) {
|
||||
binary[i * size + s] = (input[i * size + s] > 0) ? mean : -mean;
|
||||
}
|
||||
}
|
||||
|
||||
void binarize_input_gpu(float *input, int n, int size, float *binary)
|
||||
{
|
||||
void binarize_input_gpu(float *input, int n, int size, float *binary) {
|
||||
binarize_input_kernel<<<cuda_gridsize(size), BLOCK>>>(input, n, size, binary);
|
||||
check_error(cudaPeekAtLastError());
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
|
||||
__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary)
|
||||
{
|
||||
int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) {
|
||||
int f = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (f >= n) return;
|
||||
int i = 0;
|
||||
float mean = 0;
|
||||
for(i = 0; i < size; ++i){
|
||||
mean += fabsf(weights[f*size + i]);
|
||||
for (i = 0; i < size; ++i) {
|
||||
mean += fabsf(weights[f * size + i]);
|
||||
}
|
||||
mean = mean / size;
|
||||
for(i = 0; i < size; ++i){
|
||||
binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
|
||||
for (i = 0; i < size; ++i) {
|
||||
binary[f * size + i] = (weights[f * size + i] > 0) ? mean : -mean;
|
||||
//binary[f*size + i] = weights[f*size + i];
|
||||
}
|
||||
}
|
||||
|
||||
void binarize_weights_gpu(float *weights, int n, int size, float *binary)
|
||||
{
|
||||
void binarize_weights_gpu(float *weights, int n, int size, float *binary) {
|
||||
binarize_weights_kernel<<<cuda_gridsize(n), BLOCK>>>(weights, n, size, binary);
|
||||
check_error(cudaPeekAtLastError());
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
void forward_convolutional_layer_gpu(convolutional_layer l, network net)
|
||||
{
|
||||
fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1);
|
||||
if(l.binary){
|
||||
binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu);
|
||||
void forward_convolutional_layer_gpu(convolutional_layer l, network net) {
|
||||
fill_gpu(l.outputs * l.batch, 0, l.output_gpu, 1);
|
||||
if (l.binary) {
|
||||
binarize_weights_gpu(l.weights_gpu, l.n, l.c / l.groups * l.size * l.size, l.binary_weights_gpu);
|
||||
swap_binary(&l);
|
||||
}
|
||||
|
||||
if(l.xnor){
|
||||
binarize_weights_gpu(l.weights_gpu, l.n, l.c/l.groups*l.size*l.size, l.binary_weights_gpu);
|
||||
if (l.xnor) {
|
||||
binarize_weights_gpu(l.weights_gpu, l.n, l.c / l.groups * l.size * l.size, l.binary_weights_gpu);
|
||||
swap_binary(&l);
|
||||
binarize_gpu(net.input_gpu, l.c*l.h*l.w*l.batch, l.binary_input_gpu);
|
||||
binarize_gpu(net.input_gpu, l.c * l.h * l.w * l.batch, l.binary_input_gpu);
|
||||
net.input_gpu = l.binary_input_gpu;
|
||||
}
|
||||
|
||||
#ifdef CUDNN
|
||||
float one = 1;
|
||||
cudnnConvolutionForward(cudnn_handle(),
|
||||
hipdnnConvolutionForward(cudnn_handle(),
|
||||
&one,
|
||||
l.srcTensorDesc,
|
||||
net.input_gpu,
|
||||
|
@ -103,22 +99,22 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network net)
|
|||
|
||||
#else
|
||||
int i, j;
|
||||
int m = l.n/l.groups;
|
||||
int k = l.size*l.size*l.c/l.groups;
|
||||
int n = l.out_w*l.out_h;
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
for(j = 0; j < l.groups; ++j){
|
||||
float *a = l.weights_gpu + j*l.nweights/l.groups;
|
||||
int m = l.n / l.groups;
|
||||
int k = l.size * l.size * l.c / l.groups;
|
||||
int n = l.out_w * l.out_h;
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
for (j = 0; j < l.groups; ++j) {
|
||||
float *a = l.weights_gpu + j * l.nweights / l.groups;
|
||||
float *b = net.workspace;
|
||||
float *c = l.output_gpu + (i*l.groups + j)*n*m;
|
||||
float *im = net.input_gpu + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
|
||||
float *c = l.output_gpu + (i * l.groups + j) * n * m;
|
||||
float *im = net.input_gpu + (i * l.groups + j) * l.c / l.groups * l.h * l.w;
|
||||
|
||||
if (l.size == 1){
|
||||
if (l.size == 1) {
|
||||
b = im;
|
||||
} else {
|
||||
im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
|
||||
im2col_gpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
|
||||
}
|
||||
gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
gemm_gpu(0, 0, m, n, k, 1, a, k, b, n, 1, c, n);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -126,18 +122,17 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network net)
|
|||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer_gpu(l, net);
|
||||
} else {
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w * l.out_h);
|
||||
}
|
||||
|
||||
activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
|
||||
activate_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation);
|
||||
//if(l.dot > 0) dot_error_gpu(l);
|
||||
if(l.binary || l.xnor) swap_binary(&l);
|
||||
if (l.binary || l.xnor) swap_binary(&l);
|
||||
}
|
||||
|
||||
__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id >= n) return;
|
||||
__global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, float rate, float *delta) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id >= n) return;
|
||||
|
||||
int j = id % w;
|
||||
id /= w;
|
||||
|
@ -147,55 +142,53 @@ __global__ void smooth_kernel(float *x, int n, int w, int h, int c, int size, fl
|
|||
id /= c;
|
||||
int b = id;
|
||||
|
||||
int w_offset = -(size/2.f);
|
||||
int h_offset = -(size/2.f);
|
||||
int w_offset = -(size / 2.f);
|
||||
int h_offset = -(size / 2.f);
|
||||
|
||||
int out_index = j + w*(i + h*(k + c*b));
|
||||
int out_index = j + w * (i + h * (k + c * b));
|
||||
int l, m;
|
||||
for(l = 0; l < size; ++l){
|
||||
for(m = 0; m < size; ++m){
|
||||
for (l = 0; l < size; ++l) {
|
||||
for (m = 0; m < size; ++m) {
|
||||
int cur_h = h_offset + i + l;
|
||||
int cur_w = w_offset + j + m;
|
||||
int index = cur_w + w*(cur_h + h*(k + b*c));
|
||||
int index = cur_w + w * (cur_h + h * (k + b * c));
|
||||
int valid = (cur_h >= 0 && cur_h < h &&
|
||||
cur_w >= 0 && cur_w < w);
|
||||
delta[out_index] += valid ? rate*(x[index] - x[out_index]) : 0;
|
||||
cur_w >= 0 && cur_w < w);
|
||||
delta[out_index] += valid ? rate * (x[index] - x[out_index]) : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void smooth_layer(layer l, int size, float rate)
|
||||
{
|
||||
void smooth_layer(layer l, int size, float rate) {
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.out_c;
|
||||
|
||||
size_t n = h*w*c*l.batch;
|
||||
size_t n = h * w * c * l.batch;
|
||||
|
||||
smooth_kernel<<<cuda_gridsize(n), BLOCK>>>(l.output_gpu, n, l.w, l.h, l.c, size, rate, l.delta_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
void backward_convolutional_layer_gpu(convolutional_layer l, network net)
|
||||
{
|
||||
if(l.smooth){
|
||||
void backward_convolutional_layer_gpu(convolutional_layer l, network net) {
|
||||
if (l.smooth) {
|
||||
smooth_layer(l, 5, l.smooth);
|
||||
}
|
||||
//constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
|
||||
gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
|
||||
gradient_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu);
|
||||
|
||||
|
||||
if(l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
backward_batchnorm_layer_gpu(l, net);
|
||||
} else {
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w * l.out_h);
|
||||
}
|
||||
float *original_input = net.input_gpu;
|
||||
|
||||
if(l.xnor) net.input_gpu = l.binary_input_gpu;
|
||||
if (l.xnor) net.input_gpu = l.binary_input_gpu;
|
||||
#ifdef CUDNN
|
||||
float one = 1;
|
||||
cudnnConvolutionBackwardFilter(cudnn_handle(),
|
||||
hipdnnConvolutionBackwardFilter(cudnn_handle(),
|
||||
&one,
|
||||
l.srcTensorDesc,
|
||||
net.input_gpu,
|
||||
|
@ -211,7 +204,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network net)
|
|||
|
||||
if(net.delta_gpu){
|
||||
if(l.binary || l.xnor) swap_binary(&l);
|
||||
cudnnConvolutionBackwardData(cudnn_handle(),
|
||||
hipdnnConvolutionBackwardData(cudnn_handle(),
|
||||
&one,
|
||||
l.weightDesc,
|
||||
l.weights_gpu,
|
||||
|
@ -229,100 +222,102 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network net)
|
|||
}
|
||||
|
||||
#else
|
||||
int m = l.n/l.groups;
|
||||
int n = l.size*l.size*l.c/l.groups;
|
||||
int k = l.out_w*l.out_h;
|
||||
int m = l.n / l.groups;
|
||||
int n = l.size * l.size * l.c / l.groups;
|
||||
int k = l.out_w * l.out_h;
|
||||
|
||||
int i, j;
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
for(j = 0; j < l.groups; ++j){
|
||||
float *a = l.delta_gpu + (i*l.groups + j)*m*k;
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
for (j = 0; j < l.groups; ++j) {
|
||||
float *a = l.delta_gpu + (i * l.groups + j) * m * k;
|
||||
float *b = net.workspace;
|
||||
float *c = l.weight_updates_gpu + j*l.nweights/l.groups;
|
||||
float *c = l.weight_updates_gpu + j * l.nweights / l.groups;
|
||||
|
||||
float *im = net.input_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w;
|
||||
float *imd = net.delta_gpu+(i*l.groups + j)*l.c/l.groups*l.h*l.w;
|
||||
float *im = net.input_gpu + (i * l.groups + j) * l.c / l.groups * l.h * l.w;
|
||||
float *imd = net.delta_gpu + (i * l.groups + j) * l.c / l.groups * l.h * l.w;
|
||||
|
||||
im2col_gpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
|
||||
gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
|
||||
im2col_gpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
|
||||
gemm_gpu(0, 1, m, n, k, 1, a, k, b, k, 1, c, n);
|
||||
|
||||
if (net.delta_gpu) {
|
||||
if (l.binary || l.xnor) swap_binary(&l);
|
||||
a = l.weights_gpu + j*l.nweights/l.groups;
|
||||
b = l.delta_gpu + (i*l.groups + j)*m*k;
|
||||
a = l.weights_gpu + j * l.nweights / l.groups;
|
||||
b = l.delta_gpu + (i * l.groups + j) * m * k;
|
||||
c = net.workspace;
|
||||
if (l.size == 1) {
|
||||
c = imd;
|
||||
}
|
||||
|
||||
gemm_gpu(1,0,n,k,m,1,a,n,b,k,0,c,k);
|
||||
gemm_gpu(1, 0, n, k, m, 1, a, n, b, k, 0, c, k);
|
||||
|
||||
if (l.size != 1) {
|
||||
col2im_gpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);
|
||||
col2im_gpu(net.workspace, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);
|
||||
}
|
||||
if(l.binary || l.xnor) {
|
||||
if (l.binary || l.xnor) {
|
||||
swap_binary(&l);
|
||||
}
|
||||
}
|
||||
if(l.xnor) gradient_array_gpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, net.delta_gpu + i*l.c*l.h*l.w);
|
||||
if (l.xnor)
|
||||
gradient_array_gpu(original_input + i * l.c * l.h * l.w, l.c * l.h * l.w, HARDTAN,
|
||||
net.delta_gpu + i * l.c * l.h * l.w);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void pull_convolutional_layer(layer l)
|
||||
{
|
||||
void pull_convolutional_layer(layer l) {
|
||||
cuda_pull_array(l.weights_gpu, l.weights, l.nweights);
|
||||
cuda_pull_array(l.biases_gpu, l.biases, l.n);
|
||||
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
|
||||
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||
if (l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
cuda_pull_array(l.scales_gpu, l.scales, l.n);
|
||||
cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
|
||||
cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
|
||||
}
|
||||
}
|
||||
|
||||
void push_convolutional_layer(layer l)
|
||||
{
|
||||
void push_convolutional_layer(layer l) {
|
||||
cuda_push_array(l.weights_gpu, l.weights, l.nweights);
|
||||
cuda_push_array(l.biases_gpu, l.biases, l.n);
|
||||
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
|
||||
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||
if (l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
cuda_push_array(l.scales_gpu, l.scales, l.n);
|
||||
cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
|
||||
cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
|
||||
}
|
||||
}
|
||||
|
||||
void update_convolutional_layer_gpu(layer l, update_args a)
|
||||
{
|
||||
float learning_rate = a.learning_rate*l.learning_rate_scale;
|
||||
void update_convolutional_layer_gpu(layer l, update_args a) {
|
||||
float learning_rate = a.learning_rate * l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
|
||||
if(a.adam){
|
||||
adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t);
|
||||
adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
|
||||
if(l.scales_gpu){
|
||||
adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
|
||||
if (a.adam) {
|
||||
adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate,
|
||||
l.nweights, batch, a.t);
|
||||
adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay,
|
||||
learning_rate, l.n, batch, a.t);
|
||||
if (l.scales_gpu) {
|
||||
adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay,
|
||||
learning_rate, l.n, batch, a.t);
|
||||
}
|
||||
}else{
|
||||
axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
|
||||
axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
|
||||
} else {
|
||||
axpy_gpu(l.nweights, -decay * batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
|
||||
axpy_gpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
|
||||
scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1);
|
||||
|
||||
axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
|
||||
axpy_gpu(l.n, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
|
||||
scal_gpu(l.n, momentum, l.bias_updates_gpu, 1);
|
||||
|
||||
if(l.scales_gpu){
|
||||
axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
|
||||
if (l.scales_gpu) {
|
||||
axpy_gpu(l.n, learning_rate / batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
|
||||
scal_gpu(l.n, momentum, l.scale_updates_gpu, 1);
|
||||
}
|
||||
}
|
||||
if(l.clip){
|
||||
if (l.clip) {
|
||||
constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,622 +0,0 @@
|
|||
#include "convolutional_layer.h"
|
||||
#include "utils.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "im2col.h"
|
||||
#include "col2im.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef AI2
|
||||
#include "xnor_layer.h"
|
||||
#endif
|
||||
|
||||
void swap_binary(convolutional_layer *l)
|
||||
{
|
||||
float *swap = l->weights;
|
||||
l->weights = l->binary_weights;
|
||||
l->binary_weights = swap;
|
||||
|
||||
#ifdef GPU
|
||||
swap = l->weights_gpu;
|
||||
l->weights_gpu = l->binary_weights_gpu;
|
||||
l->binary_weights_gpu = swap;
|
||||
#endif
|
||||
}
|
||||
|
||||
void binarize_weights(float *weights, int n, int size, float *binary)
|
||||
{
|
||||
int i, f;
|
||||
for(f = 0; f < n; ++f){
|
||||
float mean = 0;
|
||||
for(i = 0; i < size; ++i){
|
||||
mean += fabs(weights[f*size + i]);
|
||||
}
|
||||
mean = mean / size;
|
||||
for(i = 0; i < size; ++i){
|
||||
binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void binarize_cpu(float *input, int n, float *binary)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < n; ++i){
|
||||
binary[i] = (input[i] > 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
void binarize_input(float *input, int n, int size, float *binary)
|
||||
{
|
||||
int i, s;
|
||||
for(s = 0; s < size; ++s){
|
||||
float mean = 0;
|
||||
for(i = 0; i < n; ++i){
|
||||
mean += fabs(input[i*size + s]);
|
||||
}
|
||||
mean = mean / n;
|
||||
for(i = 0; i < n; ++i){
|
||||
binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int convolutional_out_height(convolutional_layer l)
|
||||
{
|
||||
return (l.h + 2*l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
int convolutional_out_width(convolutional_layer l)
|
||||
{
|
||||
return (l.w + 2*l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
image get_convolutional_image(convolutional_layer l)
|
||||
{
|
||||
return float_to_image(l.out_w,l.out_h,l.out_c,l.output);
|
||||
}
|
||||
|
||||
image get_convolutional_delta(convolutional_layer l)
|
||||
{
|
||||
return float_to_image(l.out_w,l.out_h,l.out_c,l.delta);
|
||||
}
|
||||
|
||||
static size_t get_workspace_size(layer l){
|
||||
#ifdef CUDNN
|
||||
if(gpu_index >= 0){
|
||||
size_t most = 0;
|
||||
size_t s = 0;
|
||||
cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(),
|
||||
l.srcTensorDesc,
|
||||
l.weightDesc,
|
||||
l.convDesc,
|
||||
l.dstTensorDesc,
|
||||
l.fw_algo,
|
||||
&s);
|
||||
if (s > most) most = s;
|
||||
cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(),
|
||||
l.srcTensorDesc,
|
||||
l.ddstTensorDesc,
|
||||
l.convDesc,
|
||||
l.dweightDesc,
|
||||
l.bf_algo,
|
||||
&s);
|
||||
if (s > most) most = s;
|
||||
cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(),
|
||||
l.weightDesc,
|
||||
l.ddstTensorDesc,
|
||||
l.convDesc,
|
||||
l.dsrcTensorDesc,
|
||||
l.bd_algo,
|
||||
&s);
|
||||
if (s > most) most = s;
|
||||
return most;
|
||||
}
|
||||
#endif
|
||||
return (size_t)l.out_h*l.out_w*l.size*l.size*l.c/l.groups*sizeof(float);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
#ifdef CUDNN
|
||||
void cudnn_convolutional_setup(layer *l)
|
||||
{
|
||||
cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
|
||||
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
|
||||
cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size);
|
||||
cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size);
|
||||
#if CUDNN_MAJOR >= 6
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);
|
||||
#else
|
||||
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
#endif
|
||||
|
||||
#if CUDNN_MAJOR >= 7
|
||||
cudnnSetConvolutionGroupCount(l->convDesc, l->groups);
|
||||
#else
|
||||
if(l->groups > 1){
|
||||
error("CUDNN < 7 doesn't support groups, please upgrade!");
|
||||
}
|
||||
#endif
|
||||
|
||||
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->weightDesc,
|
||||
l->convDesc,
|
||||
l->dstTensorDesc,
|
||||
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
|
||||
2000000000,
|
||||
&l->fw_algo);
|
||||
cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
|
||||
l->weightDesc,
|
||||
l->ddstTensorDesc,
|
||||
l->convDesc,
|
||||
l->dsrcTensorDesc,
|
||||
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
|
||||
2000000000,
|
||||
&l->bd_algo);
|
||||
cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->ddstTensorDesc,
|
||||
l->convDesc,
|
||||
l->dweightDesc,
|
||||
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
|
||||
2000000000,
|
||||
&l->bf_algo);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
|
||||
{
|
||||
int i;
|
||||
convolutional_layer l = {0};
|
||||
l.type = CONVOLUTIONAL;
|
||||
|
||||
l.groups = groups;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.n = n;
|
||||
l.binary = binary;
|
||||
l.xnor = xnor;
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
l.pad = padding;
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
l.weights = calloc(c/groups*n*size*size, sizeof(float));
|
||||
l.weight_updates = calloc(c/groups*n*size*size, sizeof(float));
|
||||
|
||||
l.biases = calloc(n, sizeof(float));
|
||||
l.bias_updates = calloc(n, sizeof(float));
|
||||
|
||||
l.nweights = c/groups*n*size*size;
|
||||
l.nbiases = n;
|
||||
|
||||
// float scale = 1./sqrt(size*size*c);
|
||||
float scale = sqrt(2./(size*size*c/l.groups));
|
||||
//printf("convscale %f\n", scale);
|
||||
//scale = .02;
|
||||
//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
|
||||
for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal();
|
||||
int out_w = convolutional_out_width(l);
|
||||
int out_h = convolutional_out_height(l);
|
||||
l.out_h = out_h;
|
||||
l.out_w = out_w;
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
|
||||
l.output = calloc(l.batch*l.outputs, sizeof(float));
|
||||
l.delta = calloc(l.batch*l.outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_convolutional_layer;
|
||||
l.backward = backward_convolutional_layer;
|
||||
l.update = update_convolutional_layer;
|
||||
if(binary){
|
||||
l.binary_weights = calloc(l.nweights, sizeof(float));
|
||||
l.cweights = calloc(l.nweights, sizeof(char));
|
||||
l.scales = calloc(n, sizeof(float));
|
||||
}
|
||||
if(xnor){
|
||||
l.binary_weights = calloc(l.nweights, sizeof(float));
|
||||
l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
|
||||
}
|
||||
|
||||
if(batch_normalize){
|
||||
l.scales = calloc(n, sizeof(float));
|
||||
l.scale_updates = calloc(n, sizeof(float));
|
||||
for(i = 0; i < n; ++i){
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = calloc(n, sizeof(float));
|
||||
l.variance = calloc(n, sizeof(float));
|
||||
|
||||
l.mean_delta = calloc(n, sizeof(float));
|
||||
l.variance_delta = calloc(n, sizeof(float));
|
||||
|
||||
l.rolling_mean = calloc(n, sizeof(float));
|
||||
l.rolling_variance = calloc(n, sizeof(float));
|
||||
l.x = calloc(l.batch*l.outputs, sizeof(float));
|
||||
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
|
||||
}
|
||||
if(adam){
|
||||
l.m = calloc(l.nweights, sizeof(float));
|
||||
l.v = calloc(l.nweights, sizeof(float));
|
||||
l.bias_m = calloc(n, sizeof(float));
|
||||
l.scale_m = calloc(n, sizeof(float));
|
||||
l.bias_v = calloc(n, sizeof(float));
|
||||
l.scale_v = calloc(n, sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_convolutional_layer_gpu;
|
||||
l.backward_gpu = backward_convolutional_layer_gpu;
|
||||
l.update_gpu = update_convolutional_layer_gpu;
|
||||
|
||||
if(gpu_index >= 0){
|
||||
if (adam) {
|
||||
l.m_gpu = cuda_make_array(l.m, l.nweights);
|
||||
l.v_gpu = cuda_make_array(l.v, l.nweights);
|
||||
l.bias_m_gpu = cuda_make_array(l.bias_m, n);
|
||||
l.bias_v_gpu = cuda_make_array(l.bias_v, n);
|
||||
l.scale_m_gpu = cuda_make_array(l.scale_m, n);
|
||||
l.scale_v_gpu = cuda_make_array(l.scale_v, n);
|
||||
}
|
||||
|
||||
l.weights_gpu = cuda_make_array(l.weights, l.nweights);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights);
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
|
||||
if(binary){
|
||||
l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);
|
||||
}
|
||||
if(xnor){
|
||||
l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);
|
||||
l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
|
||||
}
|
||||
|
||||
if(batch_normalize){
|
||||
l.mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.rolling_mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.rolling_variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.mean_delta_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_delta_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
||||
|
||||
l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.srcTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnCreateFilterDescriptor(&l.weightDesc);
|
||||
cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
|
||||
cudnnCreateFilterDescriptor(&l.dweightDesc);
|
||||
cudnnCreateConvolutionDescriptor(&l.convDesc);
|
||||
cudnn_convolutional_setup(&l);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
l.activation = activation;
|
||||
|
||||
fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void denormalize_convolutional_layer(convolutional_layer l)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
|
||||
for(j = 0; j < l.c/l.groups*l.size*l.size; ++j){
|
||||
l.weights[i*l.c/l.groups*l.size*l.size + j] *= scale;
|
||||
}
|
||||
l.biases[i] -= l.rolling_mean[i] * scale;
|
||||
l.scales[i] = 1;
|
||||
l.rolling_mean[i] = 0;
|
||||
l.rolling_variance[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void test_convolutional_layer()
|
||||
{
|
||||
convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0);
|
||||
l.batch_normalize = 1;
|
||||
float data[] = {1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3};
|
||||
//net.input = data;
|
||||
//forward_convolutional_layer(l);
|
||||
}
|
||||
*/
|
||||
|
||||
void resize_convolutional_layer(convolutional_layer *l, int w, int h)
|
||||
{
|
||||
l->w = w;
|
||||
l->h = h;
|
||||
int out_w = convolutional_out_width(*l);
|
||||
int out_h = convolutional_out_height(*l);
|
||||
|
||||
l->out_w = out_w;
|
||||
l->out_h = out_h;
|
||||
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
|
||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
||||
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
|
||||
if(l->batch_normalize){
|
||||
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
|
||||
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
cuda_free(l->delta_gpu);
|
||||
cuda_free(l->output_gpu);
|
||||
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
|
||||
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
|
||||
if(l->batch_normalize){
|
||||
cuda_free(l->x_gpu);
|
||||
cuda_free(l->x_norm_gpu);
|
||||
|
||||
l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnn_convolutional_setup(l);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
}
|
||||
|
||||
void add_bias(float *output, float *biases, int batch, int n, int size)
|
||||
{
|
||||
int i,j,b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(i = 0; i < n; ++i){
|
||||
for(j = 0; j < size; ++j){
|
||||
output[(b*n + i)*size + j] += biases[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void scale_bias(float *output, float *scales, int batch, int n, int size)
|
||||
{
|
||||
int i,j,b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(i = 0; i < n; ++i){
|
||||
for(j = 0; j < size; ++j){
|
||||
output[(b*n + i)*size + j] *= scales[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
|
||||
{
|
||||
int i,b;
|
||||
for(b = 0; b < batch; ++b){
|
||||
for(i = 0; i < n; ++i){
|
||||
bias_updates[i] += sum_array(delta+size*(i+b*n), size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void forward_convolutional_layer(convolutional_layer l, network net)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
|
||||
|
||||
if(l.xnor){
|
||||
binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
|
||||
swap_binary(&l);
|
||||
binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
|
||||
net.input = l.binary_input;
|
||||
}
|
||||
|
||||
int m = l.n/l.groups;
|
||||
int k = l.size*l.size*l.c/l.groups;
|
||||
int n = l.out_w*l.out_h;
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
for(j = 0; j < l.groups; ++j){
|
||||
float *a = l.weights + j*l.nweights/l.groups;
|
||||
float *b = net.workspace;
|
||||
float *c = l.output + (i*l.groups + j)*n*m;
|
||||
float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
|
||||
|
||||
if (l.size == 1) {
|
||||
b = im;
|
||||
} else {
|
||||
im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
|
||||
}
|
||||
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
}
|
||||
}
|
||||
|
||||
if(l.batch_normalize){
|
||||
forward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
|
||||
}
|
||||
|
||||
activate_array(l.output, l.outputs*l.batch, l.activation);
|
||||
if(l.binary || l.xnor) swap_binary(&l);
|
||||
}
|
||||
|
||||
void backward_convolutional_layer(convolutional_layer l, network net)
|
||||
{
|
||||
int i, j;
|
||||
int m = l.n/l.groups;
|
||||
int n = l.size*l.size*l.c/l.groups;
|
||||
int k = l.out_w*l.out_h;
|
||||
|
||||
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
|
||||
|
||||
if(l.batch_normalize){
|
||||
backward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
|
||||
}
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
for(j = 0; j < l.groups; ++j){
|
||||
float *a = l.delta + (i*l.groups + j)*m*k;
|
||||
float *b = net.workspace;
|
||||
float *c = l.weight_updates + j*l.nweights/l.groups;
|
||||
|
||||
float *im = net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
|
||||
float *imd = net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w;
|
||||
|
||||
if(l.size == 1){
|
||||
b = im;
|
||||
} else {
|
||||
im2col_cpu(im, l.c/l.groups, l.h, l.w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
}
|
||||
|
||||
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
|
||||
|
||||
if (net.delta) {
|
||||
a = l.weights + j*l.nweights/l.groups;
|
||||
b = l.delta + (i*l.groups + j)*m*k;
|
||||
c = net.workspace;
|
||||
if (l.size == 1) {
|
||||
c = imd;
|
||||
}
|
||||
|
||||
gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);
|
||||
|
||||
if (l.size != 1) {
|
||||
col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_convolutional_layer(convolutional_layer l, update_args a)
|
||||
{
|
||||
float learning_rate = a.learning_rate*l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
|
||||
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||
|
||||
if(l.scales){
|
||||
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
|
||||
scal_cpu(l.n, momentum, l.scale_updates, 1);
|
||||
}
|
||||
|
||||
axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(l.nweights, momentum, l.weight_updates, 1);
|
||||
}
|
||||
|
||||
|
||||
image get_convolutional_weight(convolutional_layer l, int i)
|
||||
{
|
||||
int h = l.size;
|
||||
int w = l.size;
|
||||
int c = l.c/l.groups;
|
||||
return float_to_image(w,h,c,l.weights+i*h*w*c);
|
||||
}
|
||||
|
||||
void rgbgr_weights(convolutional_layer l)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
image im = get_convolutional_weight(l, i);
|
||||
if (im.c == 3) {
|
||||
rgbgr_image(im);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rescale_weights(convolutional_layer l, float scale, float trans)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
image im = get_convolutional_weight(l, i);
|
||||
if (im.c == 3) {
|
||||
scale_image(im, scale);
|
||||
float sum = sum_array(im.data, im.w*im.h*im.c);
|
||||
l.biases[i] += sum*trans;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image *get_weights(convolutional_layer l)
|
||||
{
|
||||
image *weights = calloc(l.n, sizeof(image));
|
||||
int i;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
weights[i] = copy_image(get_convolutional_weight(l, i));
|
||||
normalize_image(weights[i]);
|
||||
/*
|
||||
char buff[256];
|
||||
sprintf(buff, "filter%d", i);
|
||||
save_image(weights[i], buff);
|
||||
*/
|
||||
}
|
||||
//error("hey");
|
||||
return weights;
|
||||
}
|
||||
|
||||
image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights)
|
||||
{
|
||||
image *single_weights = get_weights(l);
|
||||
show_images(single_weights, l.n, window);
|
||||
|
||||
image delta = get_convolutional_image(l);
|
||||
image dc = collapse_image_layers(delta, 1);
|
||||
char buff[256];
|
||||
sprintf(buff, "%s: Output", window);
|
||||
//show_image(dc, buff);
|
||||
//save_image(dc, buff);
|
||||
free_image(dc);
|
||||
return single_weights;
|
||||
}
|
||||
|
|
@ -0,0 +1,604 @@
|
|||
#include "convolutional_layer.h"
|
||||
#include "utils.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "im2col.h"
|
||||
#include "col2im.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef AI2
|
||||
#include "xnor_layer.h"
|
||||
#endif
|
||||
|
||||
void swap_binary(convolutional_layer *l) {
|
||||
float *swap = l->weights;
|
||||
l->weights = l->binary_weights;
|
||||
l->binary_weights = swap;
|
||||
|
||||
#ifdef GPU
|
||||
swap = l->weights_gpu;
|
||||
l->weights_gpu = l->binary_weights_gpu;
|
||||
l->binary_weights_gpu = swap;
|
||||
#endif
|
||||
}
|
||||
|
||||
void binarize_weights(float *weights, int n, int size, float *binary) {
|
||||
int i, f;
|
||||
for (f = 0; f < n; ++f) {
|
||||
float mean = 0;
|
||||
for (i = 0; i < size; ++i) {
|
||||
mean += fabs(weights[f * size + i]);
|
||||
}
|
||||
mean = mean / size;
|
||||
for (i = 0; i < size; ++i) {
|
||||
binary[f * size + i] = (weights[f * size + i] > 0) ? mean : -mean;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void binarize_cpu(float *input, int n, float *binary) {
|
||||
int i;
|
||||
for (i = 0; i < n; ++i) {
|
||||
binary[i] = (input[i] > 0) ? 1 : -1;
|
||||
}
|
||||
}
|
||||
|
||||
void binarize_input(float *input, int n, int size, float *binary) {
|
||||
int i, s;
|
||||
for (s = 0; s < size; ++s) {
|
||||
float mean = 0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
mean += fabs(input[i * size + s]);
|
||||
}
|
||||
mean = mean / n;
|
||||
for (i = 0; i < n; ++i) {
|
||||
binary[i * size + s] = (input[i * size + s] > 0) ? mean : -mean;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int convolutional_out_height(convolutional_layer l) {
|
||||
return (l.h + 2 * l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
int convolutional_out_width(convolutional_layer l) {
|
||||
return (l.w + 2 * l.pad - l.size) / l.stride + 1;
|
||||
}
|
||||
|
||||
image get_convolutional_image(convolutional_layer l) {
|
||||
return float_to_image(l.out_w, l.out_h, l.out_c, l.output);
|
||||
}
|
||||
|
||||
image get_convolutional_delta(convolutional_layer l) {
|
||||
return float_to_image(l.out_w, l.out_h, l.out_c, l.delta);
|
||||
}
|
||||
|
||||
static size_t get_workspace_size(layer l) {
|
||||
#ifdef CUDNN
|
||||
if(gpu_index >= 0){
|
||||
size_t most = 0;
|
||||
size_t s = 0;
|
||||
hipdnnGetConvolutionForwardWorkspaceSize(cudnn_handle(),
|
||||
l.srcTensorDesc,
|
||||
l.weightDesc,
|
||||
l.convDesc,
|
||||
l.dstTensorDesc,
|
||||
l.fw_algo,
|
||||
&s);
|
||||
if (s > most) most = s;
|
||||
hipdnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(),
|
||||
l.srcTensorDesc,
|
||||
l.ddstTensorDesc,
|
||||
l.convDesc,
|
||||
l.dweightDesc,
|
||||
l.bf_algo,
|
||||
&s);
|
||||
if (s > most) most = s;
|
||||
hipdnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(),
|
||||
l.weightDesc,
|
||||
l.ddstTensorDesc,
|
||||
l.convDesc,
|
||||
l.dsrcTensorDesc,
|
||||
l.bd_algo,
|
||||
&s);
|
||||
if (s > most) most = s;
|
||||
return most;
|
||||
}
|
||||
#endif
|
||||
return (size_t) l.out_h * l.out_w * l.size * l.size * l.c / l.groups * sizeof(float);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
#ifdef CUDNN
|
||||
void cudnn_convolutional_setup(layer *l)
|
||||
{
|
||||
hipdnnSetTensor4dDescriptor(l->dsrcTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
hipdnnSetTensor4dDescriptor(l->ddstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
|
||||
hipdnnSetTensor4dDescriptor(l->srcTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
|
||||
hipdnnSetTensor4dDescriptor(l->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l->normTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
|
||||
hipdnnSetFilter4dDescriptor(l->dweightDesc, HIPDNN_DATA_FLOAT, HIPDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size);
|
||||
hipdnnSetFilter4dDescriptor(l->weightDesc, HIPDNN_DATA_FLOAT, HIPDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size);
|
||||
#if CUDNN_MAJOR >= 6
|
||||
hipdnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, HIPDNN_CROSS_CORRELATION, HIPDNN_DATA_FLOAT);
|
||||
#else
|
||||
hipdnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, HIPDNN_CROSS_CORRELATION);
|
||||
#endif
|
||||
|
||||
#if CUDNN_MAJOR >= 7
|
||||
hipdnnSetConvolutionGroupCount(l->convDesc, l->groups);
|
||||
#else
|
||||
if(l->groups > 1){
|
||||
error("CUDNN < 7 doesn't support groups, please upgrade!");
|
||||
}
|
||||
#endif
|
||||
|
||||
hipdnnGetConvolutionForwardAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->weightDesc,
|
||||
l->convDesc,
|
||||
l->dstTensorDesc,
|
||||
HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
|
||||
2000000000,
|
||||
&l->fw_algo);
|
||||
hipdnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
|
||||
l->weightDesc,
|
||||
l->ddstTensorDesc,
|
||||
l->convDesc,
|
||||
l->dsrcTensorDesc,
|
||||
HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
|
||||
2000000000,
|
||||
&l->bd_algo);
|
||||
hipdnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
|
||||
l->srcTensorDesc,
|
||||
l->ddstTensorDesc,
|
||||
l->convDesc,
|
||||
l->dweightDesc,
|
||||
HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
|
||||
2000000000,
|
||||
&l->bf_algo);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
convolutional_layer
|
||||
make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding,
|
||||
ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) {
|
||||
int i;
|
||||
convolutional_layer l = {(LAYER_TYPE) 0};
|
||||
l.type = CONVOLUTIONAL;
|
||||
|
||||
l.groups = groups;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.n = n;
|
||||
l.binary = binary;
|
||||
l.xnor = xnor;
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
l.pad = padding;
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
l.weights = (float *) calloc(c / groups * n * size * size, sizeof(float));
|
||||
l.weight_updates = (float *) calloc(c / groups * n * size * size, sizeof(float));
|
||||
|
||||
l.biases = (float *) calloc(n, sizeof(float));
|
||||
l.bias_updates = (float *) calloc(n, sizeof(float));
|
||||
|
||||
l.nweights = c / groups * n * size * size;
|
||||
l.nbiases = n;
|
||||
|
||||
// float scale = 1./sqrt(size*size*c);
|
||||
float scale = sqrt(2. / (size * size * c / l.groups));
|
||||
//printf("convscale %f\n", scale);
|
||||
//scale = .02;
|
||||
//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
|
||||
for (i = 0; i < l.nweights; ++i) l.weights[i] = scale * rand_normal();
|
||||
int out_w = convolutional_out_width(l);
|
||||
int out_h = convolutional_out_height(l);
|
||||
l.out_h = out_h;
|
||||
l.out_w = out_w;
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
|
||||
l.output = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
l.delta = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_convolutional_layer;
|
||||
l.backward = backward_convolutional_layer;
|
||||
l.update = update_convolutional_layer;
|
||||
if (binary) {
|
||||
l.binary_weights = (float *) calloc(l.nweights, sizeof(float));
|
||||
l.cweights = (char *) calloc(l.nweights, sizeof(char));
|
||||
l.scales = (float *) calloc(n, sizeof(float));
|
||||
}
|
||||
if (xnor) {
|
||||
l.binary_weights = (float *) calloc(l.nweights, sizeof(float));
|
||||
l.binary_input = (float *) calloc(l.inputs * l.batch, sizeof(float));
|
||||
}
|
||||
|
||||
if (batch_normalize) {
|
||||
l.scales = (float *) calloc(n, sizeof(float));
|
||||
l.scale_updates = (float *) calloc(n, sizeof(float));
|
||||
for (i = 0; i < n; ++i) {
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = (float *) calloc(n, sizeof(float));
|
||||
l.variance = (float *) calloc(n, sizeof(float));
|
||||
|
||||
l.mean_delta = (float *) calloc(n, sizeof(float));
|
||||
l.variance_delta = (float *) calloc(n, sizeof(float));
|
||||
|
||||
l.rolling_mean = (float *) calloc(n, sizeof(float));
|
||||
l.rolling_variance = (float *) calloc(n, sizeof(float));
|
||||
l.x = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
l.x_norm = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
}
|
||||
if (adam) {
|
||||
l.m = (float *) calloc(l.nweights, sizeof(float));
|
||||
l.v = (float *) calloc(l.nweights, sizeof(float));
|
||||
l.bias_m = (float *) calloc(n, sizeof(float));
|
||||
l.scale_m = (float *) calloc(n, sizeof(float));
|
||||
l.bias_v = (float *) calloc(n, sizeof(float));
|
||||
l.scale_v = (float *) calloc(n, sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_convolutional_layer_gpu;
|
||||
l.backward_gpu = backward_convolutional_layer_gpu;
|
||||
l.update_gpu = update_convolutional_layer_gpu;
|
||||
|
||||
if(gpu_index >= 0){
|
||||
if (adam) {
|
||||
l.m_gpu = cuda_make_array(l.m, l.nweights);
|
||||
l.v_gpu = cuda_make_array(l.v, l.nweights);
|
||||
l.bias_m_gpu = cuda_make_array(l.bias_m, n);
|
||||
l.bias_v_gpu = cuda_make_array(l.bias_v, n);
|
||||
l.scale_m_gpu = cuda_make_array(l.scale_m, n);
|
||||
l.scale_v_gpu = cuda_make_array(l.scale_v, n);
|
||||
}
|
||||
|
||||
l.weights_gpu = cuda_make_array(l.weights, l.nweights);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights);
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
|
||||
if(binary){
|
||||
l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);
|
||||
}
|
||||
if(xnor){
|
||||
l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights);
|
||||
l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
|
||||
}
|
||||
|
||||
if(batch_normalize){
|
||||
l.mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.rolling_mean_gpu = cuda_make_array(l.mean, n);
|
||||
l.rolling_variance_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.mean_delta_gpu = cuda_make_array(l.mean, n);
|
||||
l.variance_delta_gpu = cuda_make_array(l.variance, n);
|
||||
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
|
||||
|
||||
l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
hipdnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.srcTensorDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
hipdnnCreateFilterDescriptor(&l.weightDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.dsrcTensorDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.ddstTensorDesc);
|
||||
hipdnnCreateFilterDescriptor(&l.dweightDesc);
|
||||
hipdnnCreateConvolutionDescriptor(&l.convDesc);
|
||||
cudnn_convolutional_setup(&l);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
l.activation = activation;
|
||||
|
||||
fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride,
|
||||
w, h, c, l.out_w, l.out_h, l.out_c,
|
||||
(2.0 * l.n * l.size * l.size * l.c / l.groups * l.out_h * l.out_w) / 1000000000.);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void denormalize_convolutional_layer(convolutional_layer l) {
|
||||
int i, j;
|
||||
for (i = 0; i < l.n; ++i) {
|
||||
float scale = l.scales[i] / sqrt(l.rolling_variance[i] + .00001);
|
||||
for (j = 0; j < l.c / l.groups * l.size * l.size; ++j) {
|
||||
l.weights[i * l.c / l.groups * l.size * l.size + j] *= scale;
|
||||
}
|
||||
l.biases[i] -= l.rolling_mean[i] * scale;
|
||||
l.scales[i] = 1;
|
||||
l.rolling_mean[i] = 0;
|
||||
l.rolling_variance[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void test_convolutional_layer()
|
||||
{
|
||||
convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0);
|
||||
l.batch_normalize = 1;
|
||||
float data[] = {1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
1,1,1,1,1,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
2,2,2,2,2,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3,
|
||||
3,3,3,3,3};
|
||||
//net.input = data;
|
||||
//forward_convolutional_layer(l);
|
||||
}
|
||||
*/
|
||||
|
||||
void resize_convolutional_layer(convolutional_layer *l, int w, int h) {
|
||||
l->w = w;
|
||||
l->h = h;
|
||||
int out_w = convolutional_out_width(*l);
|
||||
int out_h = convolutional_out_height(*l);
|
||||
|
||||
l->out_w = out_w;
|
||||
l->out_h = out_h;
|
||||
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
|
||||
l->output = (float *) realloc(l->output, l->batch * l->outputs * sizeof(float));
|
||||
l->delta = (float *) realloc(l->delta, l->batch * l->outputs * sizeof(float));
|
||||
if (l->batch_normalize) {
|
||||
l->x = (float *) realloc(l->x, l->batch * l->outputs * sizeof(float));
|
||||
l->x_norm = (float *) realloc(l->x_norm, l->batch * l->outputs * sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
cuda_free(l->delta_gpu);
|
||||
cuda_free(l->output_gpu);
|
||||
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
|
||||
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
|
||||
if(l->batch_normalize){
|
||||
cuda_free(l->x_gpu);
|
||||
cuda_free(l->x_norm_gpu);
|
||||
|
||||
l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnn_convolutional_setup(l);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
}
|
||||
|
||||
void add_bias(float *output, float *biases, int batch, int n, int size) {
|
||||
int i, j, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < size; ++j) {
|
||||
output[(b * n + i) * size + j] += biases[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void scale_bias(float *output, float *scales, int batch, int n, int size) {
|
||||
int i, j, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < size; ++j) {
|
||||
output[(b * n + i) * size + j] *= scales[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) {
|
||||
int i, b;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < n; ++i) {
|
||||
bias_updates[i] += sum_array(delta + size * (i + b * n), size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void forward_convolutional_layer(convolutional_layer l, network net) {
|
||||
int i, j;
|
||||
|
||||
fill_cpu(l.outputs * l.batch, 0, l.output, 1);
|
||||
|
||||
if (l.xnor) {
|
||||
binarize_weights(l.weights, l.n, l.c / l.groups * l.size * l.size, l.binary_weights);
|
||||
swap_binary(&l);
|
||||
binarize_cpu(net.input, l.c * l.h * l.w * l.batch, l.binary_input);
|
||||
net.input = l.binary_input;
|
||||
}
|
||||
|
||||
int m = l.n / l.groups;
|
||||
int k = l.size * l.size * l.c / l.groups;
|
||||
int n = l.out_w * l.out_h;
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
for (j = 0; j < l.groups; ++j) {
|
||||
float *a = l.weights + j * l.nweights / l.groups;
|
||||
float *b = net.workspace;
|
||||
float *c = l.output + (i * l.groups + j) * n * m;
|
||||
float *im = net.input + (i * l.groups + j) * l.c / l.groups * l.h * l.w;
|
||||
|
||||
if (l.size == 1) {
|
||||
b = im;
|
||||
} else {
|
||||
im2col_cpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
|
||||
}
|
||||
gemm(0, 0, m, n, k, 1, a, k, b, n, 1, c, n);
|
||||
}
|
||||
}
|
||||
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.n, l.out_h * l.out_w);
|
||||
}
|
||||
|
||||
activate_array(l.output, l.outputs * l.batch, l.activation);
|
||||
if (l.binary || l.xnor) swap_binary(&l);
|
||||
}
|
||||
|
||||
void backward_convolutional_layer(convolutional_layer l, network net) {
|
||||
int i, j;
|
||||
int m = l.n / l.groups;
|
||||
int n = l.size * l.size * l.c / l.groups;
|
||||
int k = l.out_w * l.out_h;
|
||||
|
||||
gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
|
||||
|
||||
if (l.batch_normalize) {
|
||||
backward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
|
||||
}
|
||||
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
for (j = 0; j < l.groups; ++j) {
|
||||
float *a = l.delta + (i * l.groups + j) * m * k;
|
||||
float *b = net.workspace;
|
||||
float *c = l.weight_updates + j * l.nweights / l.groups;
|
||||
|
||||
float *im = net.input + (i * l.groups + j) * l.c / l.groups * l.h * l.w;
|
||||
float *imd = net.delta + (i * l.groups + j) * l.c / l.groups * l.h * l.w;
|
||||
|
||||
if (l.size == 1) {
|
||||
b = im;
|
||||
} else {
|
||||
im2col_cpu(im, l.c / l.groups, l.h, l.w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
}
|
||||
|
||||
gemm(0, 1, m, n, k, 1, a, k, b, k, 1, c, n);
|
||||
|
||||
if (net.delta) {
|
||||
a = l.weights + j * l.nweights / l.groups;
|
||||
b = l.delta + (i * l.groups + j) * m * k;
|
||||
c = net.workspace;
|
||||
if (l.size == 1) {
|
||||
c = imd;
|
||||
}
|
||||
|
||||
gemm(1, 0, n, k, m, 1, a, n, b, k, 0, c, k);
|
||||
|
||||
if (l.size != 1) {
|
||||
col2im_cpu(net.workspace, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, imd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_convolutional_layer(convolutional_layer l, update_args a) {
|
||||
float learning_rate = a.learning_rate * l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
|
||||
axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||
|
||||
if (l.scales) {
|
||||
axpy_cpu(l.n, learning_rate / batch, l.scale_updates, 1, l.scales, 1);
|
||||
scal_cpu(l.n, momentum, l.scale_updates, 1);
|
||||
}
|
||||
|
||||
axpy_cpu(l.nweights, -decay * batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(l.nweights, momentum, l.weight_updates, 1);
|
||||
}
|
||||
|
||||
|
||||
image get_convolutional_weight(convolutional_layer l, int i) {
|
||||
int h = l.size;
|
||||
int w = l.size;
|
||||
int c = l.c / l.groups;
|
||||
return float_to_image(w, h, c, l.weights + i * h * w * c);
|
||||
}
|
||||
|
||||
void rgbgr_weights(convolutional_layer l) {
|
||||
int i;
|
||||
for (i = 0; i < l.n; ++i) {
|
||||
image im = get_convolutional_weight(l, i);
|
||||
if (im.c == 3) {
|
||||
rgbgr_image(im);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rescale_weights(convolutional_layer l, float scale, float trans) {
|
||||
int i;
|
||||
for (i = 0; i < l.n; ++i) {
|
||||
image im = get_convolutional_weight(l, i);
|
||||
if (im.c == 3) {
|
||||
scale_image(im, scale);
|
||||
float sum = sum_array(im.data, im.w * im.h * im.c);
|
||||
l.biases[i] += sum * trans;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
image *get_weights(convolutional_layer l) {
|
||||
image *weights = (image *)calloc(l.n, sizeof(image));
|
||||
int i;
|
||||
for (i = 0; i < l.n; ++i) {
|
||||
weights[i] = copy_image(get_convolutional_weight(l, i));
|
||||
normalize_image(weights[i]);
|
||||
/*
|
||||
char buff[256];
|
||||
sprintf(buff, "filter%d", i);
|
||||
save_image(weights[i], buff);
|
||||
*/
|
||||
}
|
||||
//error("hey");
|
||||
return weights;
|
||||
}
|
||||
|
||||
image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) {
|
||||
image *single_weights = get_weights(l);
|
||||
show_images(single_weights, l.n, window);
|
||||
|
||||
image delta = get_convolutional_image(l);
|
||||
image dc = collapse_image_layers(delta, 1);
|
||||
char buff[256];
|
||||
sprintf(buff, "%s: Output", window);
|
||||
//show_image(dc, buff);
|
||||
//save_image(dc, buff);
|
||||
free_image(dc);
|
||||
return single_weights;
|
||||
}
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef CONVOLUTIONAL_LAYER_H
|
||||
#define CONVOLUTIONAL_LAYER_H
|
||||
|
||||
#include "cuda.h"
|
||||
#include "image.h"
|
||||
#include "activations.h"
|
||||
#include "layer.h"
|
||||
|
@ -10,6 +9,9 @@
|
|||
typedef layer convolutional_layer;
|
||||
|
||||
#ifdef GPU
|
||||
#include "cuda.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
void forward_convolutional_layer_gpu(convolutional_layer layer, network net);
|
||||
void backward_convolutional_layer_gpu(convolutional_layer layer, network net);
|
||||
void update_convolutional_layer_gpu(convolutional_layer layer, update_args a);
|
||||
|
@ -25,25 +27,38 @@ void cudnn_convolutional_setup(layer *l);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam);
|
||||
convolutional_layer
|
||||
make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding,
|
||||
ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam);
|
||||
|
||||
void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
|
||||
|
||||
void forward_convolutional_layer(const convolutional_layer layer, network net);
|
||||
|
||||
void update_convolutional_layer(convolutional_layer layer, update_args a);
|
||||
|
||||
image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights);
|
||||
|
||||
void binarize_weights(float *weights, int n, int size, float *binary);
|
||||
|
||||
void swap_binary(convolutional_layer *l);
|
||||
|
||||
void binarize_weights2(float *weights, int n, int size, char *binary, float *scales);
|
||||
|
||||
void backward_convolutional_layer(convolutional_layer layer, network net);
|
||||
|
||||
void add_bias(float *output, float *biases, int batch, int n, int size);
|
||||
|
||||
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size);
|
||||
|
||||
image get_convolutional_image(convolutional_layer layer);
|
||||
|
||||
image get_convolutional_delta(convolutional_layer layer);
|
||||
|
||||
image get_convolutional_weight(convolutional_layer layer, int i);
|
||||
|
||||
int convolutional_out_height(convolutional_layer layer);
|
||||
|
||||
int convolutional_out_width(convolutional_layer layer);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,27 +1,28 @@
|
|||
#include "cost_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include "blas.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
COST_TYPE get_cost_type(char *s)
|
||||
{
|
||||
if (strcmp(s, "seg")==0) return SEG;
|
||||
if (strcmp(s, "sse")==0) return SSE;
|
||||
if (strcmp(s, "masked")==0) return MASKED;
|
||||
if (strcmp(s, "smooth")==0) return SMOOTH;
|
||||
if (strcmp(s, "L1")==0) return L1;
|
||||
if (strcmp(s, "wgan")==0) return WGAN;
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
COST_TYPE get_cost_type(char *s) {
|
||||
if (strcmp(s, "seg") == 0) return SEG;
|
||||
if (strcmp(s, "sse") == 0) return SSE;
|
||||
if (strcmp(s, "masked") == 0) return MASKED;
|
||||
if (strcmp(s, "smooth") == 0) return SMOOTH;
|
||||
if (strcmp(s, "L1") == 0) return L1;
|
||||
if (strcmp(s, "wgan") == 0) return WGAN;
|
||||
fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
|
||||
return SSE;
|
||||
}
|
||||
|
||||
char *get_cost_string(COST_TYPE a)
|
||||
{
|
||||
switch(a){
|
||||
char *get_cost_string(COST_TYPE a) {
|
||||
switch (a) {
|
||||
case SEG:
|
||||
return "seg";
|
||||
case SSE:
|
||||
|
@ -38,10 +39,9 @@ char *get_cost_string(COST_TYPE a)
|
|||
return "sse";
|
||||
}
|
||||
|
||||
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
|
||||
{
|
||||
fprintf(stderr, "cost %4d\n", inputs);
|
||||
cost_layer l = {0};
|
||||
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) {
|
||||
fprintf(stderr, "cost %4d\n", inputs);
|
||||
cost_layer l = {(LAYER_TYPE)0};
|
||||
l.type = COST;
|
||||
|
||||
l.scale = scale;
|
||||
|
@ -49,28 +49,27 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
|
|||
l.inputs = inputs;
|
||||
l.outputs = inputs;
|
||||
l.cost_type = cost_type;
|
||||
l.delta = calloc(inputs*batch, sizeof(float));
|
||||
l.output = calloc(inputs*batch, sizeof(float));
|
||||
l.cost = calloc(1, sizeof(float));
|
||||
l.delta = (float *) calloc(inputs * batch, sizeof(float));
|
||||
l.output = (float *) calloc(inputs * batch, sizeof(float));
|
||||
l.cost = (float *) calloc(1, sizeof(float));
|
||||
|
||||
l.forward = forward_cost_layer;
|
||||
l.backward = backward_cost_layer;
|
||||
#ifdef GPU
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_cost_layer_gpu;
|
||||
l.backward_gpu = backward_cost_layer_gpu;
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.output, inputs*batch);
|
||||
l.output_gpu = cuda_make_array(l.delta, inputs*batch);
|
||||
#endif
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void resize_cost_layer(cost_layer *l, int inputs)
|
||||
{
|
||||
void resize_cost_layer(cost_layer *l, int inputs) {
|
||||
l->inputs = inputs;
|
||||
l->outputs = inputs;
|
||||
l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
|
||||
l->output = realloc(l->output, inputs*l->batch*sizeof(float));
|
||||
l->delta = (float *) realloc(l->delta, inputs * l->batch * sizeof(float));
|
||||
l->output = (float *) realloc(l->output, inputs * l->batch * sizeof(float));
|
||||
#ifdef GPU
|
||||
cuda_free(l->delta_gpu);
|
||||
cuda_free(l->output_gpu);
|
||||
|
@ -79,28 +78,26 @@ void resize_cost_layer(cost_layer *l, int inputs)
|
|||
#endif
|
||||
}
|
||||
|
||||
void forward_cost_layer(cost_layer l, network net)
|
||||
{
|
||||
void forward_cost_layer(cost_layer l, network net) {
|
||||
if (!net.truth) return;
|
||||
if(l.cost_type == MASKED){
|
||||
if (l.cost_type == MASKED) {
|
||||
int i;
|
||||
for(i = 0; i < l.batch*l.inputs; ++i){
|
||||
if(net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM;
|
||||
for (i = 0; i < l.batch * l.inputs; ++i) {
|
||||
if (net.truth[i] == SECRET_NUM) net.input[i] = SECRET_NUM;
|
||||
}
|
||||
}
|
||||
if(l.cost_type == SMOOTH){
|
||||
smooth_l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
|
||||
}else if(l.cost_type == L1){
|
||||
l1_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
|
||||
if (l.cost_type == SMOOTH) {
|
||||
smooth_l1_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output);
|
||||
} else if (l.cost_type == L1) {
|
||||
l1_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output);
|
||||
} else {
|
||||
l2_cpu(l.batch*l.inputs, net.input, net.truth, l.delta, l.output);
|
||||
l2_cpu(l.batch * l.inputs, net.input, net.truth, l.delta, l.output);
|
||||
}
|
||||
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
|
||||
l.cost[0] = sum_array(l.output, l.batch * l.inputs);
|
||||
}
|
||||
|
||||
void backward_cost_layer(const cost_layer l, network net)
|
||||
{
|
||||
axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, net.delta, 1);
|
||||
void backward_cost_layer(const cost_layer l, network net) {
|
||||
axpy_cpu(l.batch * l.inputs, l.scale, l.delta, 1, net.delta, 1);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
|
@ -1,15 +1,21 @@
|
|||
#ifndef COST_LAYER_H
|
||||
#define COST_LAYER_H
|
||||
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
typedef layer cost_layer;
|
||||
|
||||
COST_TYPE get_cost_type(char *s);
|
||||
|
||||
char *get_cost_string(COST_TYPE a);
|
||||
|
||||
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
|
||||
|
||||
void forward_cost_layer(const cost_layer l, network net);
|
||||
|
||||
void backward_cost_layer(const cost_layer l, network net);
|
||||
|
||||
void resize_cost_layer(cost_layer *l, int inputs);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#include "crnn_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
|
||||
|
@ -10,9 +9,12 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static void increment_layer(layer *l, int steps)
|
||||
{
|
||||
int num = l->outputs*l->batch*steps;
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
static void increment_layer(layer *l, int steps) {
|
||||
int num = l->outputs * l->batch * steps;
|
||||
l->output += num;
|
||||
l->delta += num;
|
||||
l->x += num;
|
||||
|
@ -26,11 +28,11 @@ static void increment_layer(layer *l, int steps)
|
|||
#endif
|
||||
}
|
||||
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
|
||||
{
|
||||
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps,
|
||||
ACTIVATION activation, int batch_normalize) {
|
||||
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h, w, c, output_filters);
|
||||
batch = batch / steps;
|
||||
layer l = {0};
|
||||
layer l = {(LAYER_TYPE) 0};
|
||||
l.batch = batch;
|
||||
l.type = CRNN;
|
||||
l.steps = steps;
|
||||
|
@ -40,25 +42,28 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
|||
l.out_h = h;
|
||||
l.out_w = w;
|
||||
l.out_c = output_filters;
|
||||
l.inputs = h*w*c;
|
||||
l.inputs = h * w * c;
|
||||
l.hidden = h * w * hidden_filters;
|
||||
l.outputs = l.out_h * l.out_w * l.out_c;
|
||||
|
||||
l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
|
||||
l.state = (float *) calloc(l.hidden * batch * (steps + 1), sizeof(float));
|
||||
|
||||
l.input_layer = malloc(sizeof(layer));
|
||||
l.input_layer = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0);
|
||||
*(l.input_layer) = make_convolutional_layer(batch * steps, h, w, c, hidden_filters, 1, 3, 1, 1, activation,
|
||||
batch_normalize, 0, 0, 0);
|
||||
l.input_layer->batch = batch;
|
||||
|
||||
l.self_layer = malloc(sizeof(layer));
|
||||
l.self_layer = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0);
|
||||
*(l.self_layer) = make_convolutional_layer(batch * steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1,
|
||||
activation, batch_normalize, 0, 0, 0);
|
||||
l.self_layer->batch = batch;
|
||||
|
||||
l.output_layer = malloc(sizeof(layer));
|
||||
l.output_layer = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1, activation, batch_normalize, 0, 0, 0);
|
||||
*(l.output_layer) = make_convolutional_layer(batch * steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1,
|
||||
activation, batch_normalize, 0, 0, 0);
|
||||
l.output_layer->batch = batch;
|
||||
|
||||
l.output = l.output_layer->output;
|
||||
|
@ -81,15 +86,13 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
|
|||
return l;
|
||||
}
|
||||
|
||||
void update_crnn_layer(layer l, update_args a)
|
||||
{
|
||||
update_convolutional_layer(*(l.input_layer), a);
|
||||
update_convolutional_layer(*(l.self_layer), a);
|
||||
void update_crnn_layer(layer l, update_args a) {
|
||||
update_convolutional_layer(*(l.input_layer), a);
|
||||
update_convolutional_layer(*(l.self_layer), a);
|
||||
update_convolutional_layer(*(l.output_layer), a);
|
||||
}
|
||||
|
||||
void forward_crnn_layer(layer l, network net)
|
||||
{
|
||||
void forward_crnn_layer(layer l, network net) {
|
||||
network s = net;
|
||||
s.train = net.train;
|
||||
int i;
|
||||
|
@ -100,7 +103,7 @@ void forward_crnn_layer(layer l, network net)
|
|||
fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
|
||||
fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
|
||||
fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
|
||||
if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
|
||||
if (net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
|
||||
|
||||
for (i = 0; i < l.steps; ++i) {
|
||||
s.input = net.input;
|
||||
|
@ -110,10 +113,10 @@ void forward_crnn_layer(layer l, network net)
|
|||
forward_convolutional_layer(self_layer, s);
|
||||
|
||||
float *old_state = l.state;
|
||||
if(net.train) l.state += l.hidden*l.batch;
|
||||
if(l.shortcut){
|
||||
if (net.train) l.state += l.hidden * l.batch;
|
||||
if (l.shortcut) {
|
||||
copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
|
||||
}else{
|
||||
} else {
|
||||
fill_cpu(l.hidden * l.batch, 0, l.state, 1);
|
||||
}
|
||||
axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
|
||||
|
@ -122,27 +125,26 @@ void forward_crnn_layer(layer l, network net)
|
|||
s.input = l.state;
|
||||
forward_convolutional_layer(output_layer, s);
|
||||
|
||||
net.input += l.inputs*l.batch;
|
||||
net.input += l.inputs * l.batch;
|
||||
increment_layer(&input_layer, 1);
|
||||
increment_layer(&self_layer, 1);
|
||||
increment_layer(&output_layer, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void backward_crnn_layer(layer l, network net)
|
||||
{
|
||||
void backward_crnn_layer(layer l, network net) {
|
||||
network s = net;
|
||||
int i;
|
||||
layer input_layer = *(l.input_layer);
|
||||
layer self_layer = *(l.self_layer);
|
||||
layer output_layer = *(l.output_layer);
|
||||
|
||||
increment_layer(&input_layer, l.steps-1);
|
||||
increment_layer(&self_layer, l.steps-1);
|
||||
increment_layer(&output_layer, l.steps-1);
|
||||
increment_layer(&input_layer, l.steps - 1);
|
||||
increment_layer(&self_layer, l.steps - 1);
|
||||
increment_layer(&output_layer, l.steps - 1);
|
||||
|
||||
l.state += l.hidden*l.batch*l.steps;
|
||||
for (i = l.steps-1; i >= 0; --i) {
|
||||
l.state += l.hidden * l.batch * l.steps;
|
||||
for (i = l.steps - 1; i >= 0; --i) {
|
||||
copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
|
||||
axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
|
||||
|
||||
|
@ -150,7 +152,7 @@ void backward_crnn_layer(layer l, network net)
|
|||
s.delta = self_layer.delta;
|
||||
backward_convolutional_layer(output_layer, s);
|
||||
|
||||
l.state -= l.hidden*l.batch;
|
||||
l.state -= l.hidden * l.batch;
|
||||
/*
|
||||
if(i > 0){
|
||||
copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
|
||||
|
@ -161,14 +163,15 @@ void backward_crnn_layer(layer l, network net)
|
|||
*/
|
||||
|
||||
s.input = l.state;
|
||||
s.delta = self_layer.delta - l.hidden*l.batch;
|
||||
s.delta = self_layer.delta - l.hidden * l.batch;
|
||||
if (i == 0) s.delta = 0;
|
||||
backward_convolutional_layer(self_layer, s);
|
||||
|
||||
copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
|
||||
if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
|
||||
s.input = net.input + i*l.inputs*l.batch;
|
||||
if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
|
||||
copy_cpu(l.hidden * l.batch, self_layer.delta, 1, input_layer.delta, 1);
|
||||
if (i > 0 && l.shortcut)
|
||||
axpy_cpu(l.hidden * l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden * l.batch, 1);
|
||||
s.input = net.input + i * l.inputs * l.batch;
|
||||
if (net.delta) s.delta = net.delta + i * l.inputs * l.batch;
|
||||
else s.delta = 0;
|
||||
backward_convolutional_layer(input_layer, s);
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
#ifndef CRNN_LAYER_H
|
||||
#define CRNN_LAYER_H
|
||||
|
||||
|
@ -6,10 +5,13 @@
|
|||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
|
||||
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps,
|
||||
ACTIVATION activation, int batch_normalize);
|
||||
|
||||
void forward_crnn_layer(layer l, network net);
|
||||
|
||||
void backward_crnn_layer(layer l, network net);
|
||||
|
||||
void update_crnn_layer(layer l, update_args a);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
103
src/crop_layer.c
103
src/crop_layer.c
|
@ -1,103 +0,0 @@
|
|||
#include "crop_layer.h"
|
||||
#include "cuda.h"
|
||||
#include <stdio.h>
|
||||
|
||||
image get_crop_image(crop_layer l)
|
||||
{
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.out_c;
|
||||
return float_to_image(w,h,c,l.output);
|
||||
}
|
||||
|
||||
void backward_crop_layer(const crop_layer l, network net){}
|
||||
void backward_crop_layer_gpu(const crop_layer l, network net){}
|
||||
|
||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
|
||||
{
|
||||
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
|
||||
crop_layer l = {0};
|
||||
l.type = CROP;
|
||||
l.batch = batch;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.scale = (float)crop_height / h;
|
||||
l.flip = flip;
|
||||
l.angle = angle;
|
||||
l.saturation = saturation;
|
||||
l.exposure = exposure;
|
||||
l.out_w = crop_width;
|
||||
l.out_h = crop_height;
|
||||
l.out_c = c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||
l.output = calloc(l.outputs*batch, sizeof(float));
|
||||
l.forward = forward_crop_layer;
|
||||
l.backward = backward_crop_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_crop_layer_gpu;
|
||||
l.backward_gpu = backward_crop_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
|
||||
l.rand_gpu = cuda_make_array(0, l.batch*8);
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void resize_crop_layer(layer *l, int w, int h)
|
||||
{
|
||||
l->w = w;
|
||||
l->h = h;
|
||||
|
||||
l->out_w = l->scale*w;
|
||||
l->out_h = l->scale*h;
|
||||
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
|
||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
||||
#ifdef GPU
|
||||
cuda_free(l->output_gpu);
|
||||
l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void forward_crop_layer(const crop_layer l, network net)
|
||||
{
|
||||
int i,j,c,b,row,col;
|
||||
int index;
|
||||
int count = 0;
|
||||
int flip = (l.flip && rand()%2);
|
||||
int dh = rand()%(l.h - l.out_h + 1);
|
||||
int dw = rand()%(l.w - l.out_w + 1);
|
||||
float scale = 2;
|
||||
float trans = -1;
|
||||
if(l.noadjust){
|
||||
scale = 1;
|
||||
trans = 0;
|
||||
}
|
||||
if(!net.train){
|
||||
flip = 0;
|
||||
dh = (l.h - l.out_h)/2;
|
||||
dw = (l.w - l.out_w)/2;
|
||||
}
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
for(c = 0; c < l.c; ++c){
|
||||
for(i = 0; i < l.out_h; ++i){
|
||||
for(j = 0; j < l.out_w; ++j){
|
||||
if(flip){
|
||||
col = l.w - dw - j - 1;
|
||||
}else{
|
||||
col = j + dw;
|
||||
}
|
||||
row = i + dh;
|
||||
index = col+l.w*(row+l.h*(c + l.c*b));
|
||||
l.output[count++] = net.input[index]*scale + trans;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
#include "crop_layer.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
image get_crop_image(crop_layer l) {
|
||||
int h = l.out_h;
|
||||
int w = l.out_w;
|
||||
int c = l.out_c;
|
||||
return float_to_image(w, h, c, l.output);
|
||||
}
|
||||
|
||||
void backward_crop_layer(const crop_layer l, network net) {}
|
||||
|
||||
void backward_crop_layer_gpu(const crop_layer l, network net) {}
|
||||
|
||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle,
|
||||
float saturation, float exposure) {
|
||||
fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h, w, crop_height, crop_width, c);
|
||||
crop_layer l = {(LAYER_TYPE)0};
|
||||
l.type = CROP;
|
||||
l.batch = batch;
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.scale = (float) crop_height / h;
|
||||
l.flip = flip;
|
||||
l.angle = angle;
|
||||
l.saturation = saturation;
|
||||
l.exposure = exposure;
|
||||
l.out_w = crop_width;
|
||||
l.out_h = crop_height;
|
||||
l.out_c = c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||
l.output = (float*)calloc(l.outputs * batch, sizeof(float));
|
||||
l.forward = forward_crop_layer;
|
||||
l.backward = backward_crop_layer;
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_crop_layer_gpu;
|
||||
l.backward_gpu = backward_crop_layer_gpu;
|
||||
l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
|
||||
l.rand_gpu = cuda_make_array(0, l.batch*8);
|
||||
#endif
|
||||
return l;
|
||||
}
|
||||
|
||||
void resize_crop_layer(layer *l, int w, int h) {
|
||||
l->w = w;
|
||||
l->h = h;
|
||||
|
||||
l->out_w = l->scale * w;
|
||||
l->out_h = l->scale * h;
|
||||
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
|
||||
l->output = (float *) realloc(l->output, l->batch * l->outputs * sizeof(float));
|
||||
#ifdef GPU
|
||||
cuda_free(l->output_gpu);
|
||||
l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void forward_crop_layer(const crop_layer l, network net) {
|
||||
int i, j, c, b, row, col;
|
||||
int index;
|
||||
int count = 0;
|
||||
int flip = (l.flip && rand() % 2);
|
||||
int dh = rand() % (l.h - l.out_h + 1);
|
||||
int dw = rand() % (l.w - l.out_w + 1);
|
||||
float scale = 2;
|
||||
float trans = -1;
|
||||
if (l.noadjust) {
|
||||
scale = 1;
|
||||
trans = 0;
|
||||
}
|
||||
if (!net.train) {
|
||||
flip = 0;
|
||||
dh = (l.h - l.out_h) / 2;
|
||||
dw = (l.w - l.out_w) / 2;
|
||||
}
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
for (c = 0; c < l.c; ++c) {
|
||||
for (i = 0; i < l.out_h; ++i) {
|
||||
for (j = 0; j < l.out_w; ++j) {
|
||||
if (flip) {
|
||||
col = l.w - dw - j - 1;
|
||||
} else {
|
||||
col = j + dw;
|
||||
}
|
||||
row = i + dh;
|
||||
index = col + l.w * (row + l.h * (c + l.c * b));
|
||||
l.output[count++] = net.input[index] * scale + trans;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8,8 +8,12 @@
|
|||
typedef layer crop_layer;
|
||||
|
||||
image get_crop_image(crop_layer l);
|
||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure);
|
||||
|
||||
crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle,
|
||||
float saturation, float exposure);
|
||||
|
||||
void forward_crop_layer(const crop_layer l, network net);
|
||||
|
||||
void resize_crop_layer(layer *l, int w, int h);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -1,105 +1,122 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
extern "C" {
|
||||
#include "crop_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include "image.h"
|
||||
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) {
|
||||
if (x < 0 || x >= w || y < 0 || y >= h) return 0;
|
||||
return image[x + w * (y + c * h)];
|
||||
}
|
||||
|
||||
__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
|
||||
{
|
||||
if(x < 0 || x >= w || y < 0 || y >= h) return 0;
|
||||
return image[x + w*(y + c*h)];
|
||||
__device__ float3
|
||||
rgb_to_hsv_kernel(float3
|
||||
rgb) {
|
||||
float r = rgb.x;
|
||||
float g = rgb.y;
|
||||
float b = rgb.z;
|
||||
|
||||
float h, s, v;
|
||||
float max = (r > g) ? ((r > b) ? r : b) : ((g > b) ? g : b);
|
||||
float min = (r < g) ? ((r < b) ? r : b) : ((g < b) ? g : b);
|
||||
float delta = max - min;
|
||||
v = max;
|
||||
if (max == 0) {
|
||||
s = 0;
|
||||
h = -1;
|
||||
} else {
|
||||
s = delta / max;
|
||||
if (r == max) {
|
||||
h = (g - b) / delta;
|
||||
} else if (g == max) {
|
||||
h = 2 + (b - r) / delta;
|
||||
} else {
|
||||
h = 4 + (r - g) / delta;
|
||||
}
|
||||
if (h < 0) h += 6;
|
||||
}
|
||||
return
|
||||
make_float3(h, s, v
|
||||
);
|
||||
}
|
||||
|
||||
__device__ float3 rgb_to_hsv_kernel(float3 rgb)
|
||||
{
|
||||
float r = rgb.x;
|
||||
float g = rgb.y;
|
||||
float b = rgb.z;
|
||||
__device__ float3
|
||||
hsv_to_rgb_kernel(float3
|
||||
hsv) {
|
||||
float h = hsv.x;
|
||||
float s = hsv.y;
|
||||
float v = hsv.z;
|
||||
|
||||
float h, s, v;
|
||||
float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b);
|
||||
float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b);
|
||||
float delta = max - min;
|
||||
v = max;
|
||||
if(max == 0){
|
||||
s = 0;
|
||||
h = -1;
|
||||
}else{
|
||||
s = delta/max;
|
||||
if(r == max){
|
||||
h = (g - b) / delta;
|
||||
} else if (g == max) {
|
||||
h = 2 + (b - r) / delta;
|
||||
} else {
|
||||
h = 4 + (r - g) / delta;
|
||||
}
|
||||
if (h < 0) h += 6;
|
||||
}
|
||||
return make_float3(h, s, v);
|
||||
float r, g, b;
|
||||
float f, p, q, t;
|
||||
|
||||
if (s == 0) {
|
||||
r = g = b = v;
|
||||
} else {
|
||||
int index = (int) floorf(h);
|
||||
f = h - index;
|
||||
p = v * (1 - s);
|
||||
q = v * (1 - s * f);
|
||||
t = v * (1 - s * (1 - f));
|
||||
if (index == 0) {
|
||||
r = v;
|
||||
g = t;
|
||||
b = p;
|
||||
} else if (index == 1) {
|
||||
r = q;
|
||||
g = v;
|
||||
b = p;
|
||||
} else if (index == 2) {
|
||||
r = p;
|
||||
g = v;
|
||||
b = t;
|
||||
} else if (index == 3) {
|
||||
r = p;
|
||||
g = q;
|
||||
b = v;
|
||||
} else if (index == 4) {
|
||||
r = t;
|
||||
g = p;
|
||||
b = v;
|
||||
} else {
|
||||
r = v;
|
||||
g = p;
|
||||
b = q;
|
||||
}
|
||||
}
|
||||
r = (r < 0) ? 0 : ((r > 1) ? 1 : r);
|
||||
g = (g < 0) ? 0 : ((g > 1) ? 1 : g);
|
||||
b = (b < 0) ? 0 : ((b > 1) ? 1 : b);
|
||||
return
|
||||
make_float3(r, g, b
|
||||
);
|
||||
}
|
||||
|
||||
__device__ float3 hsv_to_rgb_kernel(float3 hsv)
|
||||
{
|
||||
float h = hsv.x;
|
||||
float s = hsv.y;
|
||||
float v = hsv.z;
|
||||
|
||||
float r, g, b;
|
||||
float f, p, q, t;
|
||||
|
||||
if (s == 0) {
|
||||
r = g = b = v;
|
||||
} else {
|
||||
int index = (int) floorf(h);
|
||||
f = h - index;
|
||||
p = v*(1-s);
|
||||
q = v*(1-s*f);
|
||||
t = v*(1-s*(1-f));
|
||||
if(index == 0){
|
||||
r = v; g = t; b = p;
|
||||
} else if(index == 1){
|
||||
r = q; g = v; b = p;
|
||||
} else if(index == 2){
|
||||
r = p; g = v; b = t;
|
||||
} else if(index == 3){
|
||||
r = p; g = q; b = v;
|
||||
} else if(index == 4){
|
||||
r = t; g = p; b = v;
|
||||
} else {
|
||||
r = v; g = p; b = q;
|
||||
}
|
||||
}
|
||||
r = (r < 0) ? 0 : ((r > 1) ? 1 : r);
|
||||
g = (g < 0) ? 0 : ((g > 1) ? 1 : g);
|
||||
b = (b < 0) ? 0 : ((b > 1) ? 1 : b);
|
||||
return make_float3(r, g, b);
|
||||
}
|
||||
|
||||
__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
|
||||
{
|
||||
__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) {
|
||||
int ix = (int) floorf(x);
|
||||
int iy = (int) floorf(y);
|
||||
|
||||
float dx = x - ix;
|
||||
float dy = y - iy;
|
||||
|
||||
float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) +
|
||||
dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) +
|
||||
(1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) +
|
||||
dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
|
||||
float val = (1 - dy) * (1 - dx) * get_pixel_kernel(image, w, h, ix, iy, c) +
|
||||
dy * (1 - dx) * get_pixel_kernel(image, w, h, ix, iy + 1, c) +
|
||||
(1 - dy) * dx * get_pixel_kernel(image, w, h, ix + 1, iy, c) +
|
||||
dy * dx * get_pixel_kernel(image, w, h, ix + 1, iy + 1, c);
|
||||
return val;
|
||||
}
|
||||
|
||||
__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift)
|
||||
{
|
||||
__global__ void
|
||||
levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure,
|
||||
float translate, float scale, float shift) {
|
||||
int size = batch * w * h;
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id >= size) return;
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id >= size) return;
|
||||
int x = id % w;
|
||||
id /= w;
|
||||
int y = id % h;
|
||||
|
@ -107,23 +124,23 @@ __global__ void levels_image_kernel(float *image, float *rand, int batch, int w,
|
|||
float rshift = rand[0];
|
||||
float gshift = rand[1];
|
||||
float bshift = rand[2];
|
||||
float r0 = rand[8*id + 0];
|
||||
float r1 = rand[8*id + 1];
|
||||
float r2 = rand[8*id + 2];
|
||||
float r3 = rand[8*id + 3];
|
||||
float r0 = rand[8 * id + 0];
|
||||
float r1 = rand[8 * id + 1];
|
||||
float r2 = rand[8 * id + 2];
|
||||
float r3 = rand[8 * id + 3];
|
||||
|
||||
saturation = r0*(saturation - 1) + 1;
|
||||
saturation = (r1 > .5f) ? 1.f/saturation : saturation;
|
||||
exposure = r2*(exposure - 1) + 1;
|
||||
exposure = (r3 > .5f) ? 1.f/exposure : exposure;
|
||||
saturation = r0 * (saturation - 1) + 1;
|
||||
saturation = (r1 > .5f) ? 1.f / saturation : saturation;
|
||||
exposure = r2 * (exposure - 1) + 1;
|
||||
exposure = (r3 > .5f) ? 1.f / exposure : exposure;
|
||||
|
||||
size_t offset = id * h * w * 3;
|
||||
image += offset;
|
||||
float r = image[x + w*(y + h*0)];
|
||||
float g = image[x + w*(y + h*1)];
|
||||
float b = image[x + w*(y + h*2)];
|
||||
float3 rgb = make_float3(r,g,b);
|
||||
if(train){
|
||||
float r = image[x + w * (y + h * 0)];
|
||||
float g = image[x + w * (y + h * 1)];
|
||||
float b = image[x + w * (y + h * 2)];
|
||||
float3 rgb = make_float3(r, g, b);
|
||||
if (train) {
|
||||
float3 hsv = rgb_to_hsv_kernel(rgb);
|
||||
hsv.y *= saturation;
|
||||
hsv.z *= exposure;
|
||||
|
@ -131,18 +148,19 @@ __global__ void levels_image_kernel(float *image, float *rand, int batch, int w,
|
|||
} else {
|
||||
shift = 0;
|
||||
}
|
||||
image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5f)*shift;
|
||||
image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5f)*shift;
|
||||
image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5f)*shift;
|
||||
image[x + w * (y + h * 0)] = rgb.x * scale + translate + (rshift - .5f) * shift;
|
||||
image[x + w * (y + h * 1)] = rgb.y * scale + translate + (gshift - .5f) * shift;
|
||||
image[x + w * (y + h * 2)] = rgb.z * scale + translate + (bshift - .5f) * shift;
|
||||
}
|
||||
|
||||
__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id >= size) return;
|
||||
__global__ void
|
||||
forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width,
|
||||
int train, int flip, float angle, float *output) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id >= size) return;
|
||||
|
||||
float cx = w/2.f;
|
||||
float cy = h/2.f;
|
||||
float cx = w / 2.f;
|
||||
float cy = h / 2.f;
|
||||
|
||||
int count = id;
|
||||
int j = id % crop_width;
|
||||
|
@ -153,55 +171,58 @@ __global__ void forward_crop_layer_kernel(float *input, float *rand, int size, i
|
|||
id /= c;
|
||||
int b = id;
|
||||
|
||||
float r4 = rand[8*b + 4];
|
||||
float r5 = rand[8*b + 5];
|
||||
float r6 = rand[8*b + 6];
|
||||
float r7 = rand[8*b + 7];
|
||||
float r4 = rand[8 * b + 4];
|
||||
float r5 = rand[8 * b + 5];
|
||||
float r6 = rand[8 * b + 6];
|
||||
float r7 = rand[8 * b + 7];
|
||||
|
||||
float dw = (w - crop_width)*r4;
|
||||
float dh = (h - crop_height)*r5;
|
||||
float dw = (w - crop_width) * r4;
|
||||
float dh = (h - crop_height) * r5;
|
||||
flip = (flip && (r6 > .5f));
|
||||
angle = 2*angle*r7 - angle;
|
||||
if(!train){
|
||||
dw = (w - crop_width)/2.f;
|
||||
dh = (h - crop_height)/2.f;
|
||||
angle = 2 * angle * r7 - angle;
|
||||
if (!train) {
|
||||
dw = (w - crop_width) / 2.f;
|
||||
dh = (h - crop_height) / 2.f;
|
||||
flip = 0;
|
||||
angle = 0;
|
||||
}
|
||||
|
||||
input += w*h*c*b;
|
||||
input += w * h * c * b;
|
||||
|
||||
float x = (flip) ? w - dw - j - 1 : j + dw;
|
||||
float x = (flip) ? w - dw - j - 1 : j + dw;
|
||||
float y = i + dh;
|
||||
|
||||
float rx = cosf(angle)*(x-cx) - sinf(angle)*(y-cy) + cx;
|
||||
float ry = sinf(angle)*(x-cx) + cosf(angle)*(y-cy) + cy;
|
||||
float rx = cosf(angle) * (x - cx) - sinf(angle) * (y - cy) + cx;
|
||||
float ry = sinf(angle) * (x - cx) + cosf(angle) * (y - cy) + cy;
|
||||
|
||||
output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
|
||||
}
|
||||
|
||||
extern "C" void forward_crop_layer_gpu(crop_layer layer, network net)
|
||||
{
|
||||
cuda_random(layer.rand_gpu, layer.batch*8);
|
||||
void forward_crop_layer_gpu(crop_layer layer, network net) {
|
||||
cuda_random(layer.rand_gpu, layer.batch * 8);
|
||||
|
||||
float radians = layer.angle*3.14159265f/180.f;
|
||||
float radians = layer.angle * 3.14159265f / 180.f;
|
||||
|
||||
float scale = 2;
|
||||
float translate = -1;
|
||||
if(layer.noadjust){
|
||||
if (layer.noadjust) {
|
||||
scale = 1;
|
||||
translate = 0;
|
||||
}
|
||||
|
||||
int size = layer.batch * layer.w * layer.h;
|
||||
|
||||
levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h, net.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
|
||||
check_error(cudaPeekAtLastError());
|
||||
levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, layer.batch, layer.w, layer.h,
|
||||
net.train, layer.saturation, layer.exposure, translate, scale,
|
||||
layer.shift);
|
||||
check_error(hipPeekAtLastError());
|
||||
|
||||
size = layer.batch*layer.c*layer.out_w*layer.out_h;
|
||||
size = layer.batch * layer.c * layer.out_w * layer.out_h;
|
||||
|
||||
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, net.train, layer.flip, radians, layer.output_gpu);
|
||||
check_error(cudaPeekAtLastError());
|
||||
forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, layer.rand_gpu, size, layer.c, layer.h,
|
||||
layer.w, layer.out_h, layer.out_w, net.train, layer.flip,
|
||||
radians, layer.output_gpu);
|
||||
check_error(hipPeekAtLastError());
|
||||
|
||||
/*
|
||||
cuda_pull_array(layer.output_gpu, layer.output, size);
|
||||
|
|
178
src/cuda.c
178
src/cuda.c
|
@ -1,178 +0,0 @@
|
|||
int gpu_index = 0;
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include "cuda.h"
|
||||
#include "utils.h"
|
||||
#include "blas.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
void cuda_set_device(int n)
|
||||
{
|
||||
gpu_index = n;
|
||||
cudaError_t status = cudaSetDevice(n);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
int cuda_get_device()
|
||||
{
|
||||
int n = 0;
|
||||
cudaError_t status = cudaGetDevice(&n);
|
||||
check_error(status);
|
||||
return n;
|
||||
}
|
||||
|
||||
void check_error(cudaError_t status)
|
||||
{
|
||||
//cudaDeviceSynchronize();
|
||||
cudaError_t status2 = cudaGetLastError();
|
||||
if (status != cudaSuccess)
|
||||
{
|
||||
const char *s = cudaGetErrorString(status);
|
||||
char buffer[256];
|
||||
printf("CUDA Error: %s\n", s);
|
||||
assert(0);
|
||||
snprintf(buffer, 256, "CUDA Error: %s", s);
|
||||
error(buffer);
|
||||
}
|
||||
if (status2 != cudaSuccess)
|
||||
{
|
||||
const char *s = cudaGetErrorString(status);
|
||||
char buffer[256];
|
||||
printf("CUDA Error Prev: %s\n", s);
|
||||
assert(0);
|
||||
snprintf(buffer, 256, "CUDA Error Prev: %s", s);
|
||||
error(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
dim3 cuda_gridsize(size_t n){
|
||||
size_t k = (n-1) / BLOCK + 1;
|
||||
size_t x = k;
|
||||
size_t y = 1;
|
||||
if(x > 65535){
|
||||
x = ceil(sqrt(k));
|
||||
y = (n-1)/(x*BLOCK) + 1;
|
||||
}
|
||||
dim3 d = {x, y, 1};
|
||||
//printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
|
||||
return d;
|
||||
}
|
||||
|
||||
#ifdef CUDNN
|
||||
cudnnHandle_t cudnn_handle()
|
||||
{
|
||||
static int init[16] = {0};
|
||||
static cudnnHandle_t handle[16];
|
||||
int i = cuda_get_device();
|
||||
if(!init[i]) {
|
||||
cudnnCreate(&handle[i]);
|
||||
init[i] = 1;
|
||||
}
|
||||
return handle[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
cublasHandle_t blas_handle()
|
||||
{
|
||||
static int init[16] = {0};
|
||||
static cublasHandle_t handle[16];
|
||||
int i = cuda_get_device();
|
||||
if(!init[i]) {
|
||||
cublasCreate(&handle[i]);
|
||||
init[i] = 1;
|
||||
}
|
||||
return handle[i];
|
||||
}
|
||||
|
||||
float *cuda_make_array(float *x, size_t n)
|
||||
{
|
||||
float *x_gpu;
|
||||
size_t size = sizeof(float)*n;
|
||||
cudaError_t status = cudaMalloc((void **)&x_gpu, size);
|
||||
check_error(status);
|
||||
if(x){
|
||||
status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
|
||||
check_error(status);
|
||||
} else {
|
||||
fill_gpu(n, 0, x_gpu, 1);
|
||||
}
|
||||
if(!x_gpu) error("Cuda malloc failed\n");
|
||||
return x_gpu;
|
||||
}
|
||||
|
||||
void cuda_random(float *x_gpu, size_t n)
|
||||
{
|
||||
static curandGenerator_t gen[16];
|
||||
static int init[16] = {0};
|
||||
int i = cuda_get_device();
|
||||
if(!init[i]){
|
||||
curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT);
|
||||
curandSetPseudoRandomGeneratorSeed(gen[i], time(0));
|
||||
init[i] = 1;
|
||||
}
|
||||
curandGenerateUniform(gen[i], x_gpu, n);
|
||||
check_error(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
|
||||
{
|
||||
float *tmp = calloc(n, sizeof(float));
|
||||
cuda_pull_array(x_gpu, tmp, n);
|
||||
//int i;
|
||||
//for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
|
||||
axpy_cpu(n, -1, x, 1, tmp, 1);
|
||||
float err = dot_cpu(n, tmp, 1, tmp, 1);
|
||||
printf("Error %s: %f\n", s, sqrt(err/n));
|
||||
free(tmp);
|
||||
return err;
|
||||
}
|
||||
|
||||
int *cuda_make_int_array(int *x, size_t n)
|
||||
{
|
||||
int *x_gpu;
|
||||
size_t size = sizeof(int)*n;
|
||||
cudaError_t status = cudaMalloc((void **)&x_gpu, size);
|
||||
check_error(status);
|
||||
if(x){
|
||||
status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
|
||||
check_error(status);
|
||||
}
|
||||
if(!x_gpu) error("Cuda malloc failed\n");
|
||||
return x_gpu;
|
||||
}
|
||||
|
||||
void cuda_free(float *x_gpu)
|
||||
{
|
||||
cudaError_t status = cudaFree(x_gpu);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
void cuda_push_array(float *x_gpu, float *x, size_t n)
|
||||
{
|
||||
size_t size = sizeof(float)*n;
|
||||
cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
void cuda_pull_array(float *x_gpu, float *x, size_t n)
|
||||
{
|
||||
size_t size = sizeof(float)*n;
|
||||
cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
float cuda_mag_array(float *x_gpu, size_t n)
|
||||
{
|
||||
float *temp = calloc(n, sizeof(float));
|
||||
cuda_pull_array(x_gpu, temp, n);
|
||||
float m = mag_array(temp, n);
|
||||
free(temp);
|
||||
return m;
|
||||
}
|
||||
#else
|
||||
void cuda_set_device(int n){}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,166 @@
|
|||
int gpu_index = 0;
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "utils.h"
|
||||
#include "blas.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
void cuda_set_device(int n) {
|
||||
gpu_index = n;
|
||||
hipError_t status = hipSetDevice(n);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
int cuda_get_device() {
|
||||
int n = 0;
|
||||
hipError_t status = hipGetDevice(&n);
|
||||
check_error(status);
|
||||
return n;
|
||||
}
|
||||
|
||||
void check_error(hipError_t status) {
|
||||
//hipDeviceSynchronize();
|
||||
hipError_t status2 = hipGetLastError();
|
||||
if (status != hipSuccess) {
|
||||
const char *s = hipGetErrorString(status);
|
||||
char buffer[256];
|
||||
printf("CUDA Error: %s\n", s);
|
||||
assert(0);
|
||||
snprintf(buffer, 256, "CUDA Error: %s", s);
|
||||
error(buffer);
|
||||
}
|
||||
if (status2 != hipSuccess) {
|
||||
const char *s = hipGetErrorString(status);
|
||||
char buffer[256];
|
||||
printf("CUDA Error Prev: %s\n", s);
|
||||
assert(0);
|
||||
snprintf(buffer, 256, "CUDA Error Prev: %s", s);
|
||||
error(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
dim3 cuda_gridsize(size_t n) {
|
||||
size_t k = (n - 1) / BLOCK + 1;
|
||||
size_t x = k;
|
||||
size_t y = 1;
|
||||
if (x > 65535) {
|
||||
x = ceil(sqrt(k));
|
||||
y = (n - 1) / (x * BLOCK) + 1;
|
||||
}
|
||||
dim3 d = {(uint32_t)x, (uint32_t)y, 1};
|
||||
//printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK);
|
||||
return d;
|
||||
}
|
||||
|
||||
#ifdef CUDNN
|
||||
hipdnnHandle_t cudnn_handle()
|
||||
{
|
||||
static int init[16] = {0};
|
||||
static hipdnnHandle_t handle[16];
|
||||
int i = cuda_get_device();
|
||||
if(!init[i]) {
|
||||
hipdnnCreate(&handle[i]);
|
||||
init[i] = 1;
|
||||
}
|
||||
return handle[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
hipblasHandle_t blas_handle() {
|
||||
static int init[16] = {0};
|
||||
static hipblasHandle_t handle[16];
|
||||
int i = cuda_get_device();
|
||||
if (!init[i]) {
|
||||
hipblasCreate(&handle[i]);
|
||||
init[i] = 1;
|
||||
}
|
||||
return handle[i];
|
||||
}
|
||||
|
||||
float *cuda_make_array(float *x, size_t n) {
|
||||
float *x_gpu;
|
||||
size_t size = sizeof(float) * n;
|
||||
hipError_t status = hipMalloc((void **) &x_gpu, size);
|
||||
check_error(status);
|
||||
if (x) {
|
||||
status = hipMemcpy(x_gpu, x, size, hipMemcpyHostToDevice);
|
||||
check_error(status);
|
||||
} else {
|
||||
fill_gpu(n, 0, x_gpu, 1);
|
||||
}
|
||||
if (!x_gpu) error("Cuda malloc failed\n");
|
||||
return x_gpu;
|
||||
}
|
||||
|
||||
void cuda_random(float *x_gpu, size_t n) {
|
||||
static hiprandGenerator_t gen[16];
|
||||
static int init[16] = {0};
|
||||
int i = cuda_get_device();
|
||||
if (!init[i]) {
|
||||
hiprandCreateGenerator(&gen[i], HIPRAND_RNG_PSEUDO_DEFAULT);
|
||||
hiprandSetPseudoRandomGeneratorSeed(gen[i], time(0));
|
||||
init[i] = 1;
|
||||
}
|
||||
hiprandGenerateUniform(gen[i], x_gpu, n);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s) {
|
||||
float *tmp = (float*)calloc(n, sizeof(float));
|
||||
cuda_pull_array(x_gpu, tmp, n);
|
||||
//int i;
|
||||
//for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
|
||||
axpy_cpu(n, -1, x, 1, tmp, 1);
|
||||
float err = dot_cpu(n, tmp, 1, tmp, 1);
|
||||
printf("Error %s: %f\n", s, sqrt(err / n));
|
||||
free(tmp);
|
||||
return err;
|
||||
}
|
||||
|
||||
int *cuda_make_int_array(int *x, size_t n) {
|
||||
int *x_gpu;
|
||||
size_t size = sizeof(int) * n;
|
||||
hipError_t status = hipMalloc((void **) &x_gpu, size);
|
||||
check_error(status);
|
||||
if (x) {
|
||||
status = hipMemcpy(x_gpu, x, size, hipMemcpyHostToDevice);
|
||||
check_error(status);
|
||||
}
|
||||
if (!x_gpu) error("Cuda malloc failed\n");
|
||||
return x_gpu;
|
||||
}
|
||||
|
||||
void cuda_free(float *x_gpu) {
|
||||
hipError_t status = hipFree(x_gpu);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
void cuda_push_array(float *x_gpu, float *x, size_t n) {
|
||||
size_t size = sizeof(float) * n;
|
||||
hipError_t status = hipMemcpy(x_gpu, x, size, hipMemcpyHostToDevice);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
void cuda_pull_array(float *x_gpu, float *x, size_t n) {
|
||||
size_t size = sizeof(float) * n;
|
||||
hipError_t status = hipMemcpy(x, x_gpu, size, hipMemcpyDeviceToHost);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
float cuda_mag_array(float *x_gpu, size_t n) {
|
||||
float *temp = (float*)calloc(n, sizeof(float));
|
||||
cuda_pull_array(x_gpu, temp, n);
|
||||
float m = mag_array(temp, n);
|
||||
free(temp);
|
||||
return m;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void cuda_set_device(int n) {}
|
||||
|
||||
#endif
|
|
@ -5,16 +5,17 @@
|
|||
|
||||
#ifdef GPU
|
||||
|
||||
void check_error(cudaError_t status);
|
||||
cublasHandle_t blas_handle();
|
||||
void check_error(hipError_t status);
|
||||
hipblasHandle_t blas_handle();
|
||||
int *cuda_make_int_array(int *x, size_t n);
|
||||
void cuda_random(float *x_gpu, size_t n);
|
||||
float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
|
||||
dim3 cuda_gridsize(size_t n);
|
||||
|
||||
#ifdef CUDNN
|
||||
cudnnHandle_t cudnn_handle();
|
||||
hipdnnHandle_t cudnn_handle();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
46
src/data.h
46
src/data.h
|
@ -1,5 +1,6 @@
|
|||
#ifndef DATA_H
|
||||
#define DATA_H
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "darknet.h"
|
||||
|
@ -8,43 +9,68 @@
|
|||
#include "image.h"
|
||||
#include "tree.h"
|
||||
|
||||
static inline float distance_from_edge(int x, int max)
|
||||
{
|
||||
int dx = (max/2) - x;
|
||||
static inline float distance_from_edge(int x, int max) {
|
||||
int dx = (max / 2) - x;
|
||||
if (dx < 0) dx = -dx;
|
||||
dx = (max/2) + 1 - dx;
|
||||
dx = (max / 2) + 1 - dx;
|
||||
dx *= 2;
|
||||
float dist = (float)dx/max;
|
||||
float dist = (float) dx / max;
|
||||
if (dist > 1) dist = 1;
|
||||
return dist;
|
||||
}
|
||||
|
||||
void load_data_blocking(load_args args);
|
||||
|
||||
|
||||
void print_letters(float *pred, int n);
|
||||
|
||||
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
|
||||
|
||||
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center);
|
||||
|
||||
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue,
|
||||
float saturation, float exposure);
|
||||
|
||||
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue,
|
||||
float saturation, float exposure);
|
||||
|
||||
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue,
|
||||
float saturation, float exposure, int center);
|
||||
|
||||
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center);
|
||||
data load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
|
||||
|
||||
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size,
|
||||
float angle, float aspect, float hue, float saturation, float exposure, int center);
|
||||
|
||||
data
|
||||
load_data_regression(char **paths, int n, int m, int classes, int min, int max, int size, float angle, float aspect,
|
||||
float hue, float saturation, float exposure);
|
||||
|
||||
data load_go(char *filename);
|
||||
|
||||
|
||||
data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h);
|
||||
|
||||
void get_random_batch(data d, int n, float *X, float *y);
|
||||
|
||||
data get_data_part(data d, int part, int total);
|
||||
|
||||
data get_random_data(data d, int num);
|
||||
|
||||
data load_categorical_data_csv(char *filename, int target, int k);
|
||||
|
||||
void normalize_data_rows(data d);
|
||||
|
||||
void scale_data_rows(data d, float s);
|
||||
|
||||
void translate_data_rows(data d, float s);
|
||||
|
||||
void randomize_data(data d);
|
||||
|
||||
data *split_data(data d, int part, int total);
|
||||
|
||||
data concat_datas(data *d, int n);
|
||||
|
||||
void fill_truth(char *path, char **labels, int k, float *truth);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
extern "C" {
|
||||
#include "convolutional_layer.h"
|
||||
#include "deconvolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
|
@ -11,127 +9,127 @@ extern "C" {
|
|||
#include "im2col.h"
|
||||
#include "col2im.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
}
|
||||
|
||||
extern "C" void forward_deconvolutional_layer_gpu(layer l, network net)
|
||||
{
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
void forward_deconvolutional_layer_gpu(layer l, network net) {
|
||||
int i;
|
||||
|
||||
int m = l.size*l.size*l.n;
|
||||
int n = l.h*l.w;
|
||||
int m = l.size * l.size * l.n;
|
||||
int n = l.h * l.w;
|
||||
int k = l.c;
|
||||
|
||||
fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1);
|
||||
fill_gpu(l.outputs * l.batch, 0, l.output_gpu, 1);
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
float *a = l.weights_gpu;
|
||||
float *b = net.input_gpu + i*l.c*l.h*l.w;
|
||||
float *b = net.input_gpu + i * l.c * l.h * l.w;
|
||||
float *c = net.workspace;
|
||||
|
||||
gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
||||
gemm_gpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n);
|
||||
|
||||
col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu+i*l.outputs);
|
||||
col2im_gpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output_gpu + i * l.outputs);
|
||||
}
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer_gpu(l, net);
|
||||
} else {
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w * l.out_h);
|
||||
}
|
||||
activate_array_gpu(l.output_gpu, l.batch*l.n*l.out_w*l.out_h, l.activation);
|
||||
activate_array_gpu(l.output_gpu, l.batch * l.n * l.out_w * l.out_h, l.activation);
|
||||
}
|
||||
|
||||
extern "C" void backward_deconvolutional_layer_gpu(layer l, network net)
|
||||
{
|
||||
void backward_deconvolutional_layer_gpu(layer l, network net) {
|
||||
int i;
|
||||
|
||||
//constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
|
||||
gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
|
||||
gradient_array_gpu(l.output_gpu, l.outputs * l.batch, l.activation, l.delta_gpu);
|
||||
|
||||
if(l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
backward_batchnorm_layer_gpu(l, net);
|
||||
} else {
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h);
|
||||
backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w * l.out_h);
|
||||
}
|
||||
|
||||
//if(net.delta_gpu) memset(net.delta_gpu, 0, l.batch*l.h*l.w*l.c*sizeof(float));
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
int m = l.c;
|
||||
int n = l.size*l.size*l.n;
|
||||
int k = l.h*l.w;
|
||||
int n = l.size * l.size * l.n;
|
||||
int k = l.h * l.w;
|
||||
|
||||
float *a = net.input_gpu + i*m*k;
|
||||
float *a = net.input_gpu + i * m * k;
|
||||
float *b = net.workspace;
|
||||
float *c = l.weight_updates_gpu;
|
||||
|
||||
im2col_gpu(l.delta_gpu + i*l.outputs, l.out_c, l.out_h, l.out_w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
|
||||
im2col_gpu(l.delta_gpu + i * l.outputs, l.out_c, l.out_h, l.out_w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
gemm_gpu(0, 1, m, n, k, 1, a, k, b, k, 1, c, n);
|
||||
|
||||
if(net.delta_gpu){
|
||||
if (net.delta_gpu) {
|
||||
int m = l.c;
|
||||
int n = l.h*l.w;
|
||||
int k = l.size*l.size*l.n;
|
||||
int n = l.h * l.w;
|
||||
int k = l.size * l.size * l.n;
|
||||
|
||||
float *a = l.weights_gpu;
|
||||
float *b = net.workspace;
|
||||
float *c = net.delta_gpu + i*n*m;
|
||||
float *c = net.delta_gpu + i * n * m;
|
||||
|
||||
gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
gemm_gpu(0, 0, m, n, k, 1, a, k, b, n, 1, c, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void pull_deconvolutional_layer(layer l)
|
||||
{
|
||||
cuda_pull_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
|
||||
void pull_deconvolutional_layer(layer l) {
|
||||
cuda_pull_array(l.weights_gpu, l.weights, l.c * l.n * l.size * l.size);
|
||||
cuda_pull_array(l.biases_gpu, l.biases, l.n);
|
||||
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
|
||||
cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.c * l.n * l.size * l.size);
|
||||
cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||
if (l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
cuda_pull_array(l.scales_gpu, l.scales, l.n);
|
||||
cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
|
||||
cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void push_deconvolutional_layer(layer l)
|
||||
{
|
||||
cuda_push_array(l.weights_gpu, l.weights, l.c*l.n*l.size*l.size);
|
||||
void push_deconvolutional_layer(layer l) {
|
||||
cuda_push_array(l.weights_gpu, l.weights, l.c * l.n * l.size * l.size);
|
||||
cuda_push_array(l.biases_gpu, l.biases, l.n);
|
||||
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c*l.n*l.size*l.size);
|
||||
cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.c * l.n * l.size * l.size);
|
||||
cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
|
||||
if (l.batch_normalize){
|
||||
if (l.batch_normalize) {
|
||||
cuda_push_array(l.scales_gpu, l.scales, l.n);
|
||||
cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n);
|
||||
cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n);
|
||||
}
|
||||
}
|
||||
|
||||
void update_deconvolutional_layer_gpu(layer l, update_args a)
|
||||
{
|
||||
float learning_rate = a.learning_rate*l.learning_rate_scale;
|
||||
void update_deconvolutional_layer_gpu(layer l, update_args a) {
|
||||
float learning_rate = a.learning_rate * l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
|
||||
if(a.adam){
|
||||
adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t);
|
||||
adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
|
||||
if(l.scales_gpu){
|
||||
adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.n, batch, a.t);
|
||||
if (a.adam) {
|
||||
adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate,
|
||||
l.nweights, batch, a.t);
|
||||
adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay,
|
||||
learning_rate, l.n, batch, a.t);
|
||||
if (l.scales_gpu) {
|
||||
adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay,
|
||||
learning_rate, l.n, batch, a.t);
|
||||
}
|
||||
}else{
|
||||
axpy_gpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
|
||||
axpy_gpu(l.nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
|
||||
} else {
|
||||
axpy_gpu(l.nweights, -decay * batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
|
||||
axpy_gpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
|
||||
scal_gpu(l.nweights, momentum, l.weight_updates_gpu, 1);
|
||||
|
||||
axpy_gpu(l.n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
|
||||
axpy_gpu(l.n, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
|
||||
scal_gpu(l.n, momentum, l.bias_updates_gpu, 1);
|
||||
|
||||
if(l.scales_gpu){
|
||||
axpy_gpu(l.n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
|
||||
if (l.scales_gpu) {
|
||||
axpy_gpu(l.n, learning_rate / batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
|
||||
scal_gpu(l.n, momentum, l.scale_updates_gpu, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,312 +0,0 @@
|
|||
#include "deconvolutional_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "utils.h"
|
||||
#include "im2col.h"
|
||||
#include "col2im.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
static size_t get_workspace_size(layer l){
|
||||
return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float);
|
||||
}
|
||||
|
||||
void bilinear_init(layer l)
|
||||
{
|
||||
int i,j,f;
|
||||
float center = (l.size-1) / 2.;
|
||||
for(f = 0; f < l.n; ++f){
|
||||
for(j = 0; j < l.size; ++j){
|
||||
for(i = 0; i < l.size; ++i){
|
||||
float val = (1 - fabs(i - center)) * (1 - fabs(j - center));
|
||||
int c = f%l.c;
|
||||
int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i;
|
||||
l.weights[ind] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam)
|
||||
{
|
||||
int i;
|
||||
layer l = {0};
|
||||
l.type = DECONVOLUTIONAL;
|
||||
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.n = n;
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
|
||||
l.nweights = c*n*size*size;
|
||||
l.nbiases = n;
|
||||
|
||||
l.weights = calloc(c*n*size*size, sizeof(float));
|
||||
l.weight_updates = calloc(c*n*size*size, sizeof(float));
|
||||
|
||||
l.biases = calloc(n, sizeof(float));
|
||||
l.bias_updates = calloc(n, sizeof(float));
|
||||
//float scale = n/(size*size*c);
|
||||
//printf("scale: %f\n", scale);
|
||||
float scale = .02;
|
||||
for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
|
||||
//bilinear_init(l);
|
||||
for(i = 0; i < n; ++i){
|
||||
l.biases[i] = 0;
|
||||
}
|
||||
l.pad = padding;
|
||||
|
||||
l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad;
|
||||
l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad;
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
|
||||
scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1);
|
||||
|
||||
l.output = calloc(l.batch*l.outputs, sizeof(float));
|
||||
l.delta = calloc(l.batch*l.outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_deconvolutional_layer;
|
||||
l.backward = backward_deconvolutional_layer;
|
||||
l.update = update_deconvolutional_layer;
|
||||
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
if(batch_normalize){
|
||||
l.scales = calloc(n, sizeof(float));
|
||||
l.scale_updates = calloc(n, sizeof(float));
|
||||
for(i = 0; i < n; ++i){
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = calloc(n, sizeof(float));
|
||||
l.variance = calloc(n, sizeof(float));
|
||||
|
||||
l.mean_delta = calloc(n, sizeof(float));
|
||||
l.variance_delta = calloc(n, sizeof(float));
|
||||
|
||||
l.rolling_mean = calloc(n, sizeof(float));
|
||||
l.rolling_variance = calloc(n, sizeof(float));
|
||||
l.x = calloc(l.batch*l.outputs, sizeof(float));
|
||||
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
|
||||
}
|
||||
if(adam){
|
||||
l.m = calloc(c*n*size*size, sizeof(float));
|
||||
l.v = calloc(c*n*size*size, sizeof(float));
|
||||
l.bias_m = calloc(n, sizeof(float));
|
||||
l.scale_m = calloc(n, sizeof(float));
|
||||
l.bias_v = calloc(n, sizeof(float));
|
||||
l.scale_v = calloc(n, sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_deconvolutional_layer_gpu;
|
||||
l.backward_gpu = backward_deconvolutional_layer_gpu;
|
||||
l.update_gpu = update_deconvolutional_layer_gpu;
|
||||
|
||||
if(gpu_index >= 0){
|
||||
|
||||
if (adam) {
|
||||
l.m_gpu = cuda_make_array(l.m, c*n*size*size);
|
||||
l.v_gpu = cuda_make_array(l.v, c*n*size*size);
|
||||
l.bias_m_gpu = cuda_make_array(l.bias_m, n);
|
||||
l.bias_v_gpu = cuda_make_array(l.bias_v, n);
|
||||
l.scale_m_gpu = cuda_make_array(l.scale_m, n);
|
||||
l.scale_v_gpu = cuda_make_array(l.scale_v, n);
|
||||
}
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
|
||||
|
||||
if(batch_normalize){
|
||||
l.mean_gpu = cuda_make_array(0, n);
|
||||
l.variance_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.rolling_mean_gpu = cuda_make_array(0, n);
|
||||
l.rolling_variance_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.mean_delta_gpu = cuda_make_array(0, n);
|
||||
l.variance_delta_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
|
||||
l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
|
||||
}
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
cudnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
l.activation = activation;
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
|
||||
fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void denormalize_deconvolutional_layer(layer l)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < l.n; ++i){
|
||||
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
|
||||
for(j = 0; j < l.c*l.size*l.size; ++j){
|
||||
l.weights[i*l.c*l.size*l.size + j] *= scale;
|
||||
}
|
||||
l.biases[i] -= l.rolling_mean[i] * scale;
|
||||
l.scales[i] = 1;
|
||||
l.rolling_mean[i] = 0;
|
||||
l.rolling_variance[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void resize_deconvolutional_layer(layer *l, int h, int w)
|
||||
{
|
||||
l->h = h;
|
||||
l->w = w;
|
||||
l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad;
|
||||
l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad;
|
||||
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
|
||||
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
|
||||
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
|
||||
if(l->batch_normalize){
|
||||
l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
|
||||
l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
cuda_free(l->delta_gpu);
|
||||
cuda_free(l->output_gpu);
|
||||
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
|
||||
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
|
||||
if(l->batch_normalize){
|
||||
cuda_free(l->x_gpu);
|
||||
cuda_free(l->x_norm_gpu);
|
||||
|
||||
l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
}
|
||||
|
||||
void forward_deconvolutional_layer(const layer l, network net)
|
||||
{
|
||||
int i;
|
||||
|
||||
int m = l.size*l.size*l.n;
|
||||
int n = l.h*l.w;
|
||||
int k = l.c;
|
||||
|
||||
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
float *a = l.weights;
|
||||
float *b = net.input + i*l.c*l.h*l.w;
|
||||
float *c = net.workspace;
|
||||
|
||||
gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
|
||||
|
||||
col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs);
|
||||
}
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
activate_array(l.output, l.batch*l.n*l.out_w*l.out_h, l.activation);
|
||||
}
|
||||
|
||||
void backward_deconvolutional_layer(layer l, network net)
|
||||
{
|
||||
int i;
|
||||
|
||||
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
|
||||
|
||||
if(l.batch_normalize){
|
||||
backward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
|
||||
}
|
||||
|
||||
//if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
|
||||
|
||||
for(i = 0; i < l.batch; ++i){
|
||||
int m = l.c;
|
||||
int n = l.size*l.size*l.n;
|
||||
int k = l.h*l.w;
|
||||
|
||||
float *a = net.input + i*m*k;
|
||||
float *b = net.workspace;
|
||||
float *c = l.weight_updates;
|
||||
|
||||
im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
|
||||
|
||||
if(net.delta){
|
||||
int m = l.c;
|
||||
int n = l.h*l.w;
|
||||
int k = l.size*l.size*l.n;
|
||||
|
||||
float *a = l.weights;
|
||||
float *b = net.workspace;
|
||||
float *c = net.delta + i*n*m;
|
||||
|
||||
gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_deconvolutional_layer(layer l, update_args a)
|
||||
{
|
||||
float learning_rate = a.learning_rate*l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
|
||||
int size = l.size*l.size*l.c*l.n;
|
||||
axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||
|
||||
if(l.scales){
|
||||
axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
|
||||
scal_cpu(l.n, momentum, l.scale_updates, 1);
|
||||
}
|
||||
|
||||
axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(size, momentum, l.weight_updates, 1);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,307 @@
|
|||
#include "deconvolutional_layer.h"
|
||||
#include "convolutional_layer.h"
|
||||
#include "batchnorm_layer.h"
|
||||
#include "utils.h"
|
||||
#include "im2col.h"
|
||||
#include "col2im.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
static size_t get_workspace_size(layer l) {
|
||||
return (size_t) l.h * l.w * l.size * l.size * l.n * sizeof(float);
|
||||
}
|
||||
|
||||
void bilinear_init(layer l) {
|
||||
int i, j, f;
|
||||
float center = (l.size - 1) / 2.;
|
||||
for (f = 0; f < l.n; ++f) {
|
||||
for (j = 0; j < l.size; ++j) {
|
||||
for (i = 0; i < l.size; ++i) {
|
||||
float val = (1 - fabs(i - center)) * (1 - fabs(j - center));
|
||||
int c = f % l.c;
|
||||
int ind = f * l.size * l.size * l.c + c * l.size * l.size + j * l.size + i;
|
||||
l.weights[ind] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding,
|
||||
ACTIVATION activation, int batch_normalize, int adam) {
|
||||
int i;
|
||||
layer l = {(LAYER_TYPE) 0};
|
||||
l.type = DECONVOLUTIONAL;
|
||||
|
||||
l.h = h;
|
||||
l.w = w;
|
||||
l.c = c;
|
||||
l.n = n;
|
||||
l.batch = batch;
|
||||
l.stride = stride;
|
||||
l.size = size;
|
||||
|
||||
l.nweights = c * n * size * size;
|
||||
l.nbiases = n;
|
||||
|
||||
l.weights = (float *) calloc(c * n * size * size, sizeof(float));
|
||||
l.weight_updates = (float *) calloc(c * n * size * size, sizeof(float));
|
||||
|
||||
l.biases = (float *) calloc(n, sizeof(float));
|
||||
l.bias_updates = (float *) calloc(n, sizeof(float));
|
||||
//float scale = n/(size*size*c);
|
||||
//printf("scale: %f\n", scale);
|
||||
float scale = .02;
|
||||
for (i = 0; i < c * n * size * size; ++i) l.weights[i] = scale * rand_normal();
|
||||
//bilinear_init(l);
|
||||
for (i = 0; i < n; ++i) {
|
||||
l.biases[i] = 0;
|
||||
}
|
||||
l.pad = padding;
|
||||
|
||||
l.out_h = (l.h - 1) * l.stride + l.size - 2 * l.pad;
|
||||
l.out_w = (l.w - 1) * l.stride + l.size - 2 * l.pad;
|
||||
l.out_c = n;
|
||||
l.outputs = l.out_w * l.out_h * l.out_c;
|
||||
l.inputs = l.w * l.h * l.c;
|
||||
|
||||
scal_cpu(l.nweights, (float) l.out_w * l.out_h / (l.w * l.h), l.weights, 1);
|
||||
|
||||
l.output = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
l.delta = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_deconvolutional_layer;
|
||||
l.backward = backward_deconvolutional_layer;
|
||||
l.update = update_deconvolutional_layer;
|
||||
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
if (batch_normalize) {
|
||||
l.scales = (float *) calloc(n, sizeof(float));
|
||||
l.scale_updates = (float *) calloc(n, sizeof(float));
|
||||
for (i = 0; i < n; ++i) {
|
||||
l.scales[i] = 1;
|
||||
}
|
||||
|
||||
l.mean = (float *) calloc(n, sizeof(float));
|
||||
l.variance = (float *) calloc(n, sizeof(float));
|
||||
|
||||
l.mean_delta = (float *) calloc(n, sizeof(float));
|
||||
l.variance_delta = (float *) calloc(n, sizeof(float));
|
||||
|
||||
l.rolling_mean = (float *) calloc(n, sizeof(float));
|
||||
l.rolling_variance = (float *) calloc(n, sizeof(float));
|
||||
l.x = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
l.x_norm = (float *) calloc(l.batch * l.outputs, sizeof(float));
|
||||
}
|
||||
if (adam) {
|
||||
l.m = (float *) calloc(c * n * size * size, sizeof(float));
|
||||
l.v = (float *) calloc(c * n * size * size, sizeof(float));
|
||||
l.bias_m = (float *) calloc(n, sizeof(float));
|
||||
l.scale_m = (float *) calloc(n, sizeof(float));
|
||||
l.bias_v = (float *) calloc(n, sizeof(float));
|
||||
l.scale_v = (float *) calloc(n, sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_deconvolutional_layer_gpu;
|
||||
l.backward_gpu = backward_deconvolutional_layer_gpu;
|
||||
l.update_gpu = update_deconvolutional_layer_gpu;
|
||||
|
||||
if(gpu_index >= 0){
|
||||
|
||||
if (adam) {
|
||||
l.m_gpu = cuda_make_array(l.m, c*n*size*size);
|
||||
l.v_gpu = cuda_make_array(l.v, c*n*size*size);
|
||||
l.bias_m_gpu = cuda_make_array(l.bias_m, n);
|
||||
l.bias_v_gpu = cuda_make_array(l.bias_v, n);
|
||||
l.scale_m_gpu = cuda_make_array(l.scale_m, n);
|
||||
l.scale_v_gpu = cuda_make_array(l.scale_v, n);
|
||||
}
|
||||
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
|
||||
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
|
||||
|
||||
l.biases_gpu = cuda_make_array(l.biases, n);
|
||||
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
|
||||
|
||||
l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n);
|
||||
l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
|
||||
|
||||
if(batch_normalize){
|
||||
l.mean_gpu = cuda_make_array(0, n);
|
||||
l.variance_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.rolling_mean_gpu = cuda_make_array(0, n);
|
||||
l.rolling_variance_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.mean_delta_gpu = cuda_make_array(0, n);
|
||||
l.variance_delta_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.scales_gpu = cuda_make_array(l.scales, n);
|
||||
l.scale_updates_gpu = cuda_make_array(0, n);
|
||||
|
||||
l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
|
||||
l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
|
||||
}
|
||||
}
|
||||
#ifdef CUDNN
|
||||
hipdnnCreateTensorDescriptor(&l.dstTensorDesc);
|
||||
hipdnnCreateTensorDescriptor(&l.normTensorDesc);
|
||||
hipdnnSetTensor4dDescriptor(l.dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.normTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
l.activation = activation;
|
||||
l.workspace_size = get_workspace_size(l);
|
||||
|
||||
fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c,
|
||||
l.out_w, l.out_h, l.out_c);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void denormalize_deconvolutional_layer(layer l) {
|
||||
int i, j;
|
||||
for (i = 0; i < l.n; ++i) {
|
||||
float scale = l.scales[i] / sqrt(l.rolling_variance[i] + .00001);
|
||||
for (j = 0; j < l.c * l.size * l.size; ++j) {
|
||||
l.weights[i * l.c * l.size * l.size + j] *= scale;
|
||||
}
|
||||
l.biases[i] -= l.rolling_mean[i] * scale;
|
||||
l.scales[i] = 1;
|
||||
l.rolling_mean[i] = 0;
|
||||
l.rolling_variance[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void resize_deconvolutional_layer(layer *l, int h, int w) {
|
||||
l->h = h;
|
||||
l->w = w;
|
||||
l->out_h = (l->h - 1) * l->stride + l->size - 2 * l->pad;
|
||||
l->out_w = (l->w - 1) * l->stride + l->size - 2 * l->pad;
|
||||
|
||||
l->outputs = l->out_h * l->out_w * l->out_c;
|
||||
l->inputs = l->w * l->h * l->c;
|
||||
|
||||
l->output = (float *) realloc(l->output, l->batch * l->outputs * sizeof(float));
|
||||
l->delta = (float *) realloc(l->delta, l->batch * l->outputs * sizeof(float));
|
||||
if (l->batch_normalize) {
|
||||
l->x = (float *) realloc(l->x, l->batch * l->outputs * sizeof(float));
|
||||
l->x_norm = (float *) realloc(l->x_norm, l->batch * l->outputs * sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
cuda_free(l->delta_gpu);
|
||||
cuda_free(l->output_gpu);
|
||||
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
|
||||
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
|
||||
if(l->batch_normalize){
|
||||
cuda_free(l->x_gpu);
|
||||
cuda_free(l->x_norm_gpu);
|
||||
|
||||
l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
|
||||
}
|
||||
#ifdef CUDNN
|
||||
hipdnnSetTensor4dDescriptor(l->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l->normTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
|
||||
#endif
|
||||
#endif
|
||||
l->workspace_size = get_workspace_size(*l);
|
||||
}
|
||||
|
||||
void forward_deconvolutional_layer(const layer l, network net) {
|
||||
int i;
|
||||
|
||||
int m = l.size * l.size * l.n;
|
||||
int n = l.h * l.w;
|
||||
int k = l.c;
|
||||
|
||||
fill_cpu(l.outputs * l.batch, 0, l.output, 1);
|
||||
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
float *a = l.weights;
|
||||
float *b = net.input + i * l.c * l.h * l.w;
|
||||
float *c = net.workspace;
|
||||
|
||||
gemm_cpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n);
|
||||
|
||||
col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output + i * l.outputs);
|
||||
}
|
||||
if (l.batch_normalize) {
|
||||
forward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
add_bias(l.output, l.biases, l.batch, l.n, l.out_w * l.out_h);
|
||||
}
|
||||
activate_array(l.output, l.batch * l.n * l.out_w * l.out_h, l.activation);
|
||||
}
|
||||
|
||||
void backward_deconvolutional_layer(layer l, network net) {
|
||||
int i;
|
||||
|
||||
gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
|
||||
|
||||
if (l.batch_normalize) {
|
||||
backward_batchnorm_layer(l, net);
|
||||
} else {
|
||||
backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w * l.out_h);
|
||||
}
|
||||
|
||||
//if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
|
||||
|
||||
for (i = 0; i < l.batch; ++i) {
|
||||
int m = l.c;
|
||||
int n = l.size * l.size * l.n;
|
||||
int k = l.h * l.w;
|
||||
|
||||
float *a = net.input + i * m * k;
|
||||
float *b = net.workspace;
|
||||
float *c = l.weight_updates;
|
||||
|
||||
im2col_cpu(l.delta + i * l.outputs, l.out_c, l.out_h, l.out_w,
|
||||
l.size, l.stride, l.pad, b);
|
||||
gemm_cpu(0, 1, m, n, k, 1, a, k, b, k, 1, c, n);
|
||||
|
||||
if (net.delta) {
|
||||
int m = l.c;
|
||||
int n = l.h * l.w;
|
||||
int k = l.size * l.size * l.n;
|
||||
|
||||
float *a = l.weights;
|
||||
float *b = net.workspace;
|
||||
float *c = net.delta + i * n * m;
|
||||
|
||||
gemm_cpu(0, 0, m, n, k, 1, a, k, b, n, 1, c, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_deconvolutional_layer(layer l, update_args a) {
|
||||
float learning_rate = a.learning_rate * l.learning_rate_scale;
|
||||
float momentum = a.momentum;
|
||||
float decay = a.decay;
|
||||
int batch = a.batch;
|
||||
|
||||
int size = l.size * l.size * l.c * l.n;
|
||||
axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||
|
||||
if (l.scales) {
|
||||
axpy_cpu(l.n, learning_rate / batch, l.scale_updates, 1, l.scales, 1);
|
||||
scal_cpu(l.n, momentum, l.scale_updates, 1);
|
||||
}
|
||||
|
||||
axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1);
|
||||
axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
|
||||
scal_cpu(size, momentum, l.weight_updates, 1);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,13 +1,14 @@
|
|||
#ifndef DECONVOLUTIONAL_LAYER_H
|
||||
#define DECONVOLUTIONAL_LAYER_H
|
||||
|
||||
#include "cuda.h"
|
||||
#include "image.h"
|
||||
#include "activations.h"
|
||||
#include "layer.h"
|
||||
#include "network.h"
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
void forward_deconvolutional_layer_gpu(layer l, network net);
|
||||
void backward_deconvolutional_layer_gpu(layer l, network net);
|
||||
void update_deconvolutional_layer_gpu(layer l, update_args a);
|
||||
|
@ -15,10 +16,15 @@ void push_deconvolutional_layer(layer l);
|
|||
void pull_deconvolutional_layer(layer l);
|
||||
#endif
|
||||
|
||||
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam);
|
||||
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding,
|
||||
ACTIVATION activation, int batch_normalize, int adam);
|
||||
|
||||
void resize_deconvolutional_layer(layer *l, int h, int w);
|
||||
|
||||
void forward_deconvolutional_layer(const layer l, network net);
|
||||
|
||||
void update_deconvolutional_layer(layer l, update_args a);
|
||||
|
||||
void backward_deconvolutional_layer(layer l, network net);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -203,11 +203,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
|||
|
||||
int i;
|
||||
demo_total = size_network(net);
|
||||
predictions = calloc(demo_frame, sizeof(float*));
|
||||
predictions = (float **) calloc(demo_frame, sizeof(float*));
|
||||
for (i = 0; i < demo_frame; ++i){
|
||||
predictions[i] = calloc(demo_total, sizeof(float));
|
||||
predictions[i] = (float *) calloc(demo_total, sizeof(float));
|
||||
}
|
||||
avg = calloc(demo_total, sizeof(float));
|
||||
avg = (float *) calloc(demo_total, sizeof(float));
|
||||
|
||||
if(filename){
|
||||
printf("video file: %s\n", filename);
|
||||
|
@ -255,7 +255,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
|
|||
void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen)
|
||||
{
|
||||
demo_frame = avg_frames;
|
||||
predictions = calloc(demo_frame, sizeof(float*));
|
||||
predictions = (float**)calloc(demo_frame, sizeof(float*));
|
||||
image **alphabet = load_alphabet();
|
||||
demo_names = names;
|
||||
demo_alphabet = alphabet;
|
||||
|
@ -341,9 +341,11 @@ pthread_join(detect_thread, 0);
|
|||
}
|
||||
*/
|
||||
#else
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen)
|
||||
{
|
||||
|
||||
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes,
|
||||
int delay, char *prefix, int avg, float hier, int w, int h, int frames, int fullscreen) {
|
||||
fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -3,7 +3,6 @@
|
|||
#include "softmax_layer.h"
|
||||
#include "blas.h"
|
||||
#include "box.h"
|
||||
#include "cuda.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -11,9 +10,12 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
|
||||
{
|
||||
detection_layer l = {0};
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) {
|
||||
detection_layer l = {(LAYER_TYPE)0};
|
||||
l.type = DETECTION;
|
||||
|
||||
l.n = n;
|
||||
|
@ -25,12 +27,12 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
|
|||
l.side = side;
|
||||
l.w = side;
|
||||
l.h = side;
|
||||
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
|
||||
l.cost = calloc(1, sizeof(float));
|
||||
assert(side * side * ((1 + l.coords) * l.n + l.classes) == inputs);
|
||||
l.cost = (float *) calloc(1, sizeof(float));
|
||||
l.outputs = l.inputs;
|
||||
l.truths = l.side*l.side*(1+l.coords+l.classes);
|
||||
l.output = calloc(batch*l.outputs, sizeof(float));
|
||||
l.delta = calloc(batch*l.outputs, sizeof(float));
|
||||
l.truths = l.side * l.side * (1 + l.coords + l.classes);
|
||||
l.output = (float *) calloc(batch * l.outputs, sizeof(float));
|
||||
l.delta = (float *) calloc(batch * l.outputs, sizeof(float));
|
||||
|
||||
l.forward = forward_detection_layer;
|
||||
l.backward = backward_detection_layer;
|
||||
|
@ -47,24 +49,23 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
|
|||
return l;
|
||||
}
|
||||
|
||||
void forward_detection_layer(const detection_layer l, network net)
|
||||
{
|
||||
int locations = l.side*l.side;
|
||||
int i,j;
|
||||
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
|
||||
void forward_detection_layer(const detection_layer l, network net) {
|
||||
int locations = l.side * l.side;
|
||||
int i, j;
|
||||
memcpy(l.output, net.input, l.outputs * l.batch * sizeof(float));
|
||||
//if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
|
||||
int b;
|
||||
if (l.softmax){
|
||||
for(b = 0; b < l.batch; ++b){
|
||||
int index = b*l.inputs;
|
||||
if (l.softmax) {
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
int index = b * l.inputs;
|
||||
for (i = 0; i < locations; ++i) {
|
||||
int offset = i*l.classes;
|
||||
int offset = i * l.classes;
|
||||
softmax(l.output + index + offset, l.classes, 1, 1,
|
||||
l.output + index + offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(net.train){
|
||||
if (net.train) {
|
||||
float avg_iou = 0;
|
||||
float avg_cat = 0;
|
||||
float avg_allcat = 0;
|
||||
|
@ -74,15 +75,15 @@ void forward_detection_layer(const detection_layer l, network net)
|
|||
*(l.cost) = 0;
|
||||
int size = l.inputs * l.batch;
|
||||
memset(l.delta, 0, size * sizeof(float));
|
||||
for (b = 0; b < l.batch; ++b){
|
||||
int index = b*l.inputs;
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
int index = b * l.inputs;
|
||||
for (i = 0; i < locations; ++i) {
|
||||
int truth_index = (b*locations + i)*(1+l.coords+l.classes);
|
||||
int truth_index = (b * locations + i) * (1 + l.coords + l.classes);
|
||||
int is_obj = net.truth[truth_index];
|
||||
for (j = 0; j < l.n; ++j) {
|
||||
int p_index = index + locations*l.classes + i*l.n + j;
|
||||
l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]);
|
||||
*(l.cost) += l.noobject_scale*pow(l.output[p_index], 2);
|
||||
int p_index = index + locations * l.classes + i * l.n + j;
|
||||
l.delta[p_index] = l.noobject_scale * (0 - l.output[p_index]);
|
||||
*(l.cost) += l.noobject_scale * pow(l.output[p_index], 2);
|
||||
avg_anyobj += l.output[p_index];
|
||||
}
|
||||
|
||||
|
@ -90,118 +91,121 @@ void forward_detection_layer(const detection_layer l, network net)
|
|||
float best_iou = 0;
|
||||
float best_rmse = 20;
|
||||
|
||||
if (!is_obj){
|
||||
if (!is_obj) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int class_index = index + i*l.classes;
|
||||
for(j = 0; j < l.classes; ++j) {
|
||||
l.delta[class_index+j] = l.class_scale * (net.truth[truth_index+1+j] - l.output[class_index+j]);
|
||||
*(l.cost) += l.class_scale * pow(net.truth[truth_index+1+j] - l.output[class_index+j], 2);
|
||||
if(net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
|
||||
avg_allcat += l.output[class_index+j];
|
||||
int class_index = index + i * l.classes;
|
||||
for (j = 0; j < l.classes; ++j) {
|
||||
l.delta[class_index + j] =
|
||||
l.class_scale * (net.truth[truth_index + 1 + j] - l.output[class_index + j]);
|
||||
*(l.cost) += l.class_scale * pow(net.truth[truth_index + 1 + j] - l.output[class_index + j], 2);
|
||||
if (net.truth[truth_index + 1 + j]) avg_cat += l.output[class_index + j];
|
||||
avg_allcat += l.output[class_index + j];
|
||||
}
|
||||
|
||||
box truth = float_to_box(net.truth + truth_index + 1 + l.classes, 1);
|
||||
truth.x /= l.side;
|
||||
truth.y /= l.side;
|
||||
|
||||
for(j = 0; j < l.n; ++j){
|
||||
int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
|
||||
for (j = 0; j < l.n; ++j) {
|
||||
int box_index = index + locations * (l.classes + l.n) + (i * l.n + j) * l.coords;
|
||||
box out = float_to_box(l.output + box_index, 1);
|
||||
out.x /= l.side;
|
||||
out.y /= l.side;
|
||||
|
||||
if (l.sqrt){
|
||||
out.w = out.w*out.w;
|
||||
out.h = out.h*out.h;
|
||||
if (l.sqrt) {
|
||||
out.w = out.w * out.w;
|
||||
out.h = out.h * out.h;
|
||||
}
|
||||
|
||||
float iou = box_iou(out, truth);
|
||||
float iou = box_iou(out, truth);
|
||||
//iou = 0;
|
||||
float rmse = box_rmse(out, truth);
|
||||
if(best_iou > 0 || iou > 0){
|
||||
if(iou > best_iou){
|
||||
if (best_iou > 0 || iou > 0) {
|
||||
if (iou > best_iou) {
|
||||
best_iou = iou;
|
||||
best_index = j;
|
||||
}
|
||||
}else{
|
||||
if(rmse < best_rmse){
|
||||
} else {
|
||||
if (rmse < best_rmse) {
|
||||
best_rmse = rmse;
|
||||
best_index = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(l.forced){
|
||||
if(truth.w*truth.h < .1){
|
||||
if (l.forced) {
|
||||
if (truth.w * truth.h < .1) {
|
||||
best_index = 1;
|
||||
}else{
|
||||
} else {
|
||||
best_index = 0;
|
||||
}
|
||||
}
|
||||
if(l.random && *(net.seen) < 64000){
|
||||
best_index = rand()%l.n;
|
||||
if (l.random && *(net.seen) < 64000) {
|
||||
best_index = rand() % l.n;
|
||||
}
|
||||
|
||||
int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
|
||||
int box_index = index + locations * (l.classes + l.n) + (i * l.n + best_index) * l.coords;
|
||||
int tbox_index = truth_index + 1 + l.classes;
|
||||
|
||||
box out = float_to_box(l.output + box_index, 1);
|
||||
out.x /= l.side;
|
||||
out.y /= l.side;
|
||||
if (l.sqrt) {
|
||||
out.w = out.w*out.w;
|
||||
out.h = out.h*out.h;
|
||||
out.w = out.w * out.w;
|
||||
out.h = out.h * out.h;
|
||||
}
|
||||
float iou = box_iou(out, truth);
|
||||
float iou = box_iou(out, truth);
|
||||
|
||||
//printf("%d,", best_index);
|
||||
int p_index = index + locations*l.classes + i*l.n + best_index;
|
||||
int p_index = index + locations * l.classes + i * l.n + best_index;
|
||||
*(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
|
||||
*(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
|
||||
*(l.cost) += l.object_scale * pow(1 - l.output[p_index], 2);
|
||||
avg_obj += l.output[p_index];
|
||||
l.delta[p_index] = l.object_scale * (1.-l.output[p_index]);
|
||||
l.delta[p_index] = l.object_scale * (1. - l.output[p_index]);
|
||||
|
||||
if(l.rescore){
|
||||
if (l.rescore) {
|
||||
l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
|
||||
}
|
||||
|
||||
l.delta[box_index+0] = l.coord_scale*(net.truth[tbox_index + 0] - l.output[box_index + 0]);
|
||||
l.delta[box_index+1] = l.coord_scale*(net.truth[tbox_index + 1] - l.output[box_index + 1]);
|
||||
l.delta[box_index+2] = l.coord_scale*(net.truth[tbox_index + 2] - l.output[box_index + 2]);
|
||||
l.delta[box_index+3] = l.coord_scale*(net.truth[tbox_index + 3] - l.output[box_index + 3]);
|
||||
if(l.sqrt){
|
||||
l.delta[box_index+2] = l.coord_scale*(sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]);
|
||||
l.delta[box_index+3] = l.coord_scale*(sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]);
|
||||
l.delta[box_index + 0] = l.coord_scale * (net.truth[tbox_index + 0] - l.output[box_index + 0]);
|
||||
l.delta[box_index + 1] = l.coord_scale * (net.truth[tbox_index + 1] - l.output[box_index + 1]);
|
||||
l.delta[box_index + 2] = l.coord_scale * (net.truth[tbox_index + 2] - l.output[box_index + 2]);
|
||||
l.delta[box_index + 3] = l.coord_scale * (net.truth[tbox_index + 3] - l.output[box_index + 3]);
|
||||
if (l.sqrt) {
|
||||
l.delta[box_index + 2] =
|
||||
l.coord_scale * (sqrt(net.truth[tbox_index + 2]) - l.output[box_index + 2]);
|
||||
l.delta[box_index + 3] =
|
||||
l.coord_scale * (sqrt(net.truth[tbox_index + 3]) - l.output[box_index + 3]);
|
||||
}
|
||||
|
||||
*(l.cost) += pow(1-iou, 2);
|
||||
*(l.cost) += pow(1 - iou, 2);
|
||||
avg_iou += iou;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
if(0){
|
||||
float *costs = calloc(l.batch*locations*l.n, sizeof(float));
|
||||
if (0) {
|
||||
float *costs = (float *) calloc(l.batch * locations * l.n, sizeof(float));
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
int index = b*l.inputs;
|
||||
int index = b * l.inputs;
|
||||
for (i = 0; i < locations; ++i) {
|
||||
for (j = 0; j < l.n; ++j) {
|
||||
int p_index = index + locations*l.classes + i*l.n + j;
|
||||
costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index];
|
||||
int p_index = index + locations * l.classes + i * l.n + j;
|
||||
costs[b * locations * l.n + i * l.n + j] = l.delta[p_index] * l.delta[p_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
int indexes[100];
|
||||
top_k(costs, l.batch*locations*l.n, 100, indexes);
|
||||
top_k(costs, l.batch * locations * l.n, 100, indexes);
|
||||
float cutoff = costs[indexes[99]];
|
||||
for (b = 0; b < l.batch; ++b) {
|
||||
int index = b*l.inputs;
|
||||
int index = b * l.inputs;
|
||||
for (i = 0; i < locations; ++i) {
|
||||
for (j = 0; j < l.n; ++j) {
|
||||
int p_index = index + locations*l.classes + i*l.n + j;
|
||||
if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0;
|
||||
int p_index = index + locations * l.classes + i * l.n + j;
|
||||
if (l.delta[p_index] * l.delta[p_index] < cutoff) l.delta[p_index] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -212,39 +216,39 @@ void forward_detection_layer(const detection_layer l, network net)
|
|||
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
|
||||
|
||||
|
||||
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
|
||||
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n",
|
||||
avg_iou / count, avg_cat / count, avg_allcat / (count * l.classes), avg_obj / count,
|
||||
avg_anyobj / (l.batch * locations * l.n), count);
|
||||
//if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void backward_detection_layer(const detection_layer l, network net)
|
||||
{
|
||||
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
|
||||
void backward_detection_layer(const detection_layer l, network net) {
|
||||
axpy_cpu(l.batch * l.inputs, 1, l.delta, 1, net.delta, 1);
|
||||
}
|
||||
|
||||
void get_detection_detections(layer l, int w, int h, float thresh, detection *dets)
|
||||
{
|
||||
int i,j,n;
|
||||
void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) {
|
||||
int i, j, n;
|
||||
float *predictions = l.output;
|
||||
//int per_cell = 5*num+classes;
|
||||
for (i = 0; i < l.side*l.side; ++i){
|
||||
for (i = 0; i < l.side * l.side; ++i) {
|
||||
int row = i / l.side;
|
||||
int col = i % l.side;
|
||||
for(n = 0; n < l.n; ++n){
|
||||
int index = i*l.n + n;
|
||||
int p_index = l.side*l.side*l.classes + i*l.n + n;
|
||||
for (n = 0; n < l.n; ++n) {
|
||||
int index = i * l.n + n;
|
||||
int p_index = l.side * l.side * l.classes + i * l.n + n;
|
||||
float scale = predictions[p_index];
|
||||
int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4;
|
||||
int box_index = l.side * l.side * (l.classes + l.n) + (i * l.n + n) * 4;
|
||||
box b;
|
||||
b.x = (predictions[box_index + 0] + col) / l.side * w;
|
||||
b.y = (predictions[box_index + 1] + row) / l.side * h;
|
||||
b.w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w;
|
||||
b.h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h;
|
||||
b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w;
|
||||
b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h;
|
||||
dets[index].bbox = b;
|
||||
dets[index].objectness = scale;
|
||||
for(j = 0; j < l.classes; ++j){
|
||||
int class_index = i*l.classes;
|
||||
float prob = scale*predictions[class_index+j];
|
||||
for (j = 0; j < l.classes; ++j) {
|
||||
int class_index = i * l.classes;
|
||||
float prob = scale * predictions[class_index + j];
|
||||
dets[index].prob[j] = (prob > thresh) ? prob : 0;
|
||||
}
|
||||
}
|
|
@ -7,7 +7,9 @@
|
|||
typedef layer detection_layer;
|
||||
|
||||
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
|
||||
|
||||
void forward_detection_layer(const detection_layer l, network net);
|
||||
|
||||
void backward_detection_layer(const detection_layer l, network net);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -1,59 +1,58 @@
|
|||
#include "dropout_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
dropout_layer make_dropout_layer(int batch, int inputs, float probability)
|
||||
{
|
||||
dropout_layer l = {0};
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
dropout_layer make_dropout_layer(int batch, int inputs, float probability) {
|
||||
dropout_layer l = {(LAYER_TYPE) 0};
|
||||
l.type = DROPOUT;
|
||||
l.probability = probability;
|
||||
l.inputs = inputs;
|
||||
l.outputs = inputs;
|
||||
l.batch = batch;
|
||||
l.rand = calloc(inputs*batch, sizeof(float));
|
||||
l.scale = 1./(1.-probability);
|
||||
l.rand = (float *) calloc(inputs * batch, sizeof(float));
|
||||
l.scale = 1. / (1. - probability);
|
||||
l.forward = forward_dropout_layer;
|
||||
l.backward = backward_dropout_layer;
|
||||
#ifdef GPU
|
||||
#ifdef GPU
|
||||
l.forward_gpu = forward_dropout_layer_gpu;
|
||||
l.backward_gpu = backward_dropout_layer_gpu;
|
||||
l.rand_gpu = cuda_make_array(l.rand, inputs*batch);
|
||||
#endif
|
||||
#endif
|
||||
fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs);
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
void resize_dropout_layer(dropout_layer *l, int inputs)
|
||||
{
|
||||
l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float));
|
||||
#ifdef GPU
|
||||
void resize_dropout_layer(dropout_layer *l, int inputs) {
|
||||
l->rand = (float *) realloc(l->rand, l->inputs * l->batch * sizeof(float));
|
||||
#ifdef GPU
|
||||
cuda_free(l->rand_gpu);
|
||||
|
||||
l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void forward_dropout_layer(dropout_layer l, network net)
|
||||
{
|
||||
void forward_dropout_layer(dropout_layer l, network net) {
|
||||
int i;
|
||||
if (!net.train) return;
|
||||
for(i = 0; i < l.batch * l.inputs; ++i){
|
||||
for (i = 0; i < l.batch * l.inputs; ++i) {
|
||||
float r = rand_uniform(0, 1);
|
||||
l.rand[i] = r;
|
||||
if(r < l.probability) net.input[i] = 0;
|
||||
if (r < l.probability) net.input[i] = 0;
|
||||
else net.input[i] *= l.scale;
|
||||
}
|
||||
}
|
||||
|
||||
void backward_dropout_layer(dropout_layer l, network net)
|
||||
{
|
||||
void backward_dropout_layer(dropout_layer l, network net) {
|
||||
int i;
|
||||
if(!net.delta) return;
|
||||
for(i = 0; i < l.batch * l.inputs; ++i){
|
||||
if (!net.delta) return;
|
||||
for (i = 0; i < l.batch * l.inputs; ++i) {
|
||||
float r = l.rand[i];
|
||||
if(r < l.probability) net.delta[i] = 0;
|
||||
if (r < l.probability) net.delta[i] = 0;
|
||||
else net.delta[i] *= l.scale;
|
||||
}
|
||||
}
|
|
@ -9,7 +9,9 @@ typedef layer dropout_layer;
|
|||
dropout_layer make_dropout_layer(int batch, int inputs, float probability);
|
||||
|
||||
void forward_dropout_layer(dropout_layer l, network net);
|
||||
|
||||
void backward_dropout_layer(dropout_layer l, network net);
|
||||
|
||||
void resize_dropout_layer(dropout_layer *l, int inputs);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -1,23 +1,22 @@
|
|||
#include "cuda_runtime.h"
|
||||
#include "curand.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
extern "C" {
|
||||
#include "dropout_layer.h"
|
||||
#include "cuda.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) {
|
||||
int id = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if (id < size) input[id] = (rand[id] < prob) ? 0 : input[id] * scale;
|
||||
}
|
||||
|
||||
__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
|
||||
{
|
||||
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
|
||||
if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale;
|
||||
}
|
||||
|
||||
void forward_dropout_layer_gpu(dropout_layer layer, network net)
|
||||
{
|
||||
void forward_dropout_layer_gpu(dropout_layer layer, network net) {
|
||||
if (!net.train) return;
|
||||
int size = layer.inputs*layer.batch;
|
||||
int size = layer.inputs * layer.batch;
|
||||
cuda_random(layer.rand_gpu, size);
|
||||
/*
|
||||
int i;
|
||||
|
@ -27,15 +26,16 @@ void forward_dropout_layer_gpu(dropout_layer layer, network net)
|
|||
cuda_push_array(layer.rand_gpu, layer.rand, size);
|
||||
*/
|
||||
|
||||
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
|
||||
check_error(cudaPeekAtLastError());
|
||||
yoloswag420blazeit360noscope<<<cuda_gridsize(
|
||||
size), BLOCK>>>(net.input_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
||||
void backward_dropout_layer_gpu(dropout_layer layer, network net)
|
||||
{
|
||||
if(!net.delta_gpu) return;
|
||||
int size = layer.inputs*layer.batch;
|
||||
void backward_dropout_layer_gpu(dropout_layer layer, network net) {
|
||||
if (!net.delta_gpu) return;
|
||||
int size = layer.inputs * layer.batch;
|
||||
|
||||
yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
|
||||
check_error(cudaPeekAtLastError());
|
||||
yoloswag420blazeit360noscope<<<cuda_gridsize(
|
||||
size), BLOCK>>>(net.delta_gpu, size, layer.rand_gpu, layer.probability, layer.scale);
|
||||
check_error(hipPeekAtLastError());
|
||||
}
|
||||
|
|
324
src/gemm.c
324
src/gemm.c
|
@ -1,324 +0,0 @@
|
|||
#include "gemm.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
void gemm_bin(int M, int N, int K, float ALPHA,
|
||||
char *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc)
|
||||
{
|
||||
int i,j,k;
|
||||
for(i = 0; i < M; ++i){
|
||||
for(k = 0; k < K; ++k){
|
||||
char A_PART = A[i*lda+k];
|
||||
if(A_PART){
|
||||
for(j = 0; j < N; ++j){
|
||||
C[i*ldc+j] += B[k*ldb+j];
|
||||
}
|
||||
} else {
|
||||
for(j = 0; j < N; ++j){
|
||||
C[i*ldc+j] -= B[k*ldb+j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float *random_matrix(int rows, int cols)
|
||||
{
|
||||
int i;
|
||||
float *m = calloc(rows*cols, sizeof(float));
|
||||
for(i = 0; i < rows*cols; ++i){
|
||||
m[i] = (float)rand()/RAND_MAX;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
void time_random_matrix(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<10; ++i){
|
||||
gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
}
|
||||
end = clock();
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc)
|
||||
{
|
||||
gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
|
||||
}
|
||||
|
||||
void gemm_nn(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc)
|
||||
{
|
||||
int i,j,k;
|
||||
#pragma omp parallel for
|
||||
for(i = 0; i < M; ++i){
|
||||
for(k = 0; k < K; ++k){
|
||||
register float A_PART = ALPHA*A[i*lda+k];
|
||||
for(j = 0; j < N; ++j){
|
||||
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gemm_nt(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc)
|
||||
{
|
||||
int i,j,k;
|
||||
#pragma omp parallel for
|
||||
for(i = 0; i < M; ++i){
|
||||
for(j = 0; j < N; ++j){
|
||||
register float sum = 0;
|
||||
for(k = 0; k < K; ++k){
|
||||
sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
|
||||
}
|
||||
C[i*ldc+j] += sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gemm_tn(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc)
|
||||
{
|
||||
int i,j,k;
|
||||
#pragma omp parallel for
|
||||
for(i = 0; i < M; ++i){
|
||||
for(k = 0; k < K; ++k){
|
||||
register float A_PART = ALPHA*A[k*lda+i];
|
||||
for(j = 0; j < N; ++j){
|
||||
C[i*ldc+j] += A_PART*B[k*ldb+j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gemm_tt(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc)
|
||||
{
|
||||
int i,j,k;
|
||||
#pragma omp parallel for
|
||||
for(i = 0; i < M; ++i){
|
||||
for(j = 0; j < N; ++j){
|
||||
register float sum = 0;
|
||||
for(k = 0; k < K; ++k){
|
||||
sum += ALPHA*A[i+k*lda]*B[k+j*ldb];
|
||||
}
|
||||
C[i*ldc+j] += sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc)
|
||||
{
|
||||
//printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc);
|
||||
int i, j;
|
||||
for(i = 0; i < M; ++i){
|
||||
for(j = 0; j < N; ++j){
|
||||
C[i*ldc + j] *= BETA;
|
||||
}
|
||||
}
|
||||
if(!TA && !TB)
|
||||
gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
|
||||
else if(TA && !TB)
|
||||
gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
|
||||
else if(!TA && TB)
|
||||
gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
|
||||
else
|
||||
gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include <math.h>
|
||||
|
||||
void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A_gpu, int lda,
|
||||
float *B_gpu, int ldb,
|
||||
float BETA,
|
||||
float *C_gpu, int ldc)
|
||||
{
|
||||
cublasHandle_t handle = blas_handle();
|
||||
cudaError_t status = cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N),
|
||||
(TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<32; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
}
|
||||
end = clock();
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
void time_gpu(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
int iter = 10;
|
||||
float *a = random_matrix(m,k);
|
||||
float *b = random_matrix(k,n);
|
||||
|
||||
int lda = (!TA)?k:m;
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
|
||||
float *a_cl = cuda_make_array(a, m*k);
|
||||
float *b_cl = cuda_make_array(b, k*n);
|
||||
float *c_cl = cuda_make_array(c, m*n);
|
||||
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<iter; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
|
||||
cudaThreadSynchronize();
|
||||
}
|
||||
double flop = ((double)m)*n*(2.*k + 2.)*iter;
|
||||
double gflop = flop/pow(10., 9);
|
||||
end = clock();
|
||||
double seconds = sec(end-start);
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
|
||||
cuda_free(a_cl);
|
||||
cuda_free(b_cl);
|
||||
cuda_free(c_cl);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
srand(0);
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
float *c_gpu = random_matrix(m,n);
|
||||
memset(c, 0, m*n*sizeof(float));
|
||||
memset(c_gpu, 0, m*n*sizeof(float));
|
||||
int i;
|
||||
//pm(m,k,b);
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n);
|
||||
//printf("GPU\n");
|
||||
//pm(m, n, c_gpu);
|
||||
|
||||
gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
//printf("\n\nCPU\n");
|
||||
//pm(m, n, c);
|
||||
double sse = 0;
|
||||
for(i = 0; i < m*n; ++i) {
|
||||
//printf("%f %f\n", c[i], c_gpu[i]);
|
||||
sse += pow(c[i]-c_gpu[i], 2);
|
||||
}
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g SSE\n",m,k,k,n, TA, TB, sse/(m*n));
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
free(c_gpu);
|
||||
}
|
||||
|
||||
int test_gpu_blas()
|
||||
{
|
||||
/*
|
||||
test_gpu_accuracy(0,0,10,576,75);
|
||||
|
||||
test_gpu_accuracy(0,0,17,10,10);
|
||||
test_gpu_accuracy(1,0,17,10,10);
|
||||
test_gpu_accuracy(0,1,17,10,10);
|
||||
test_gpu_accuracy(1,1,17,10,10);
|
||||
|
||||
test_gpu_accuracy(0,0,1000,10,100);
|
||||
test_gpu_accuracy(1,0,1000,10,100);
|
||||
test_gpu_accuracy(0,1,1000,10,100);
|
||||
test_gpu_accuracy(1,1,1000,10,100);
|
||||
|
||||
test_gpu_accuracy(0,0,10,10,10);
|
||||
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,192,729,1600);
|
||||
time_gpu(0,0,384,196,1728);
|
||||
time_gpu(0,0,256,196,3456);
|
||||
time_gpu(0,0,256,196,2304);
|
||||
time_gpu(0,0,128,4096,12544);
|
||||
time_gpu(0,0,128,4096,4096);
|
||||
*/
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,576,12544);
|
||||
time_gpu(0,0,256,2304,784);
|
||||
time_gpu(1,1,2304,256,784);
|
||||
time_gpu(0,0,512,4608,196);
|
||||
time_gpu(1,1,4608,512,196);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,321 @@
|
|||
#include "gemm.h"
|
||||
#include "utils.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
void gemm_bin(int M, int N, int K, float ALPHA,
|
||||
char *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc) {
|
||||
int i, j, k;
|
||||
for (i = 0; i < M; ++i) {
|
||||
for (k = 0; k < K; ++k) {
|
||||
char A_PART = A[i * lda + k];
|
||||
if (A_PART) {
|
||||
for (j = 0; j < N; ++j) {
|
||||
C[i * ldc + j] += B[k * ldb + j];
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < N; ++j) {
|
||||
C[i * ldc + j] -= B[k * ldb + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float *random_matrix(int rows, int cols) {
|
||||
int i;
|
||||
float *m = (float*)calloc(rows * cols, sizeof(float));
|
||||
for (i = 0; i < rows * cols; ++i) {
|
||||
m[i] = (float) rand() / RAND_MAX;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
void time_random_matrix(int TA, int TB, int m, int k, int n) {
|
||||
float *a;
|
||||
if (!TA) a = random_matrix(m, k);
|
||||
else a = random_matrix(k, m);
|
||||
int lda = (!TA) ? k : m;
|
||||
float *b;
|
||||
if (!TB) b = random_matrix(k, n);
|
||||
else b = random_matrix(n, k);
|
||||
int ldb = (!TB) ? n : k;
|
||||
|
||||
float *c = random_matrix(m, n);
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for (i = 0; i < 10; ++i) {
|
||||
gemm_cpu(TA, TB, m, n, k, 1, a, lda, b, ldb, 1, c, n);
|
||||
}
|
||||
end = clock();
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n", m, k, k, n, TA, TB,
|
||||
(float) (end - start) / CLOCKS_PER_SEC);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc) {
|
||||
gemm_cpu(TA, TB, M, N, K, ALPHA, A, lda, B, ldb, BETA, C, ldc);
|
||||
}
|
||||
|
||||
void gemm_nn(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc) {
|
||||
int i, j, k;
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < M; ++i) {
|
||||
for (k = 0; k < K; ++k) {
|
||||
register float A_PART = ALPHA * A[i * lda + k];
|
||||
for (j = 0; j < N; ++j) {
|
||||
C[i * ldc + j] += A_PART * B[k * ldb + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gemm_nt(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc) {
|
||||
int i, j, k;
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < M; ++i) {
|
||||
for (j = 0; j < N; ++j) {
|
||||
register float sum = 0;
|
||||
for (k = 0; k < K; ++k) {
|
||||
sum += ALPHA * A[i * lda + k] * B[j * ldb + k];
|
||||
}
|
||||
C[i * ldc + j] += sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gemm_tn(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc) {
|
||||
int i, j, k;
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < M; ++i) {
|
||||
for (k = 0; k < K; ++k) {
|
||||
register float A_PART = ALPHA * A[k * lda + i];
|
||||
for (j = 0; j < N; ++j) {
|
||||
C[i * ldc + j] += A_PART * B[k * ldb + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gemm_tt(int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc) {
|
||||
int i, j, k;
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < M; ++i) {
|
||||
for (j = 0; j < N; ++j) {
|
||||
register float sum = 0;
|
||||
for (k = 0; k < K; ++k) {
|
||||
sum += ALPHA * A[i + k * lda] * B[k + j * ldb];
|
||||
}
|
||||
C[i * ldc + j] += sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc) {
|
||||
//printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc);
|
||||
int i, j;
|
||||
for (i = 0; i < M; ++i) {
|
||||
for (j = 0; j < N; ++j) {
|
||||
C[i * ldc + j] *= BETA;
|
||||
}
|
||||
}
|
||||
if (!TA && !TB)
|
||||
gemm_nn(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
|
||||
else if (TA && !TB)
|
||||
gemm_tn(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
|
||||
else if (!TA && TB)
|
||||
gemm_nt(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
|
||||
else
|
||||
gemm_tt(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
|
||||
}
|
||||
|
||||
#include "gemm.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A_gpu, int lda,
|
||||
float *B_gpu, int ldb,
|
||||
float BETA,
|
||||
float *C_gpu, int ldc)
|
||||
{
|
||||
hipblasHandle_t handle = blas_handle();
|
||||
hipblasStatus_t status = hipblasSgemm(handle, (TB ? HIPBLAS_OP_T : HIPBLAS_OP_N),
|
||||
(TA ? HIPBLAS_OP_T : HIPBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
|
||||
// check_error(status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<32; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
}
|
||||
end = clock();
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
void time_gpu(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
int iter = 10;
|
||||
float *a = random_matrix(m,k);
|
||||
float *b = random_matrix(k,n);
|
||||
|
||||
int lda = (!TA)?k:m;
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
|
||||
float *a_cl = cuda_make_array(a, m*k);
|
||||
float *b_cl = cuda_make_array(b, k*n);
|
||||
float *c_cl = cuda_make_array(c, m*n);
|
||||
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<iter; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
double flop = ((double)m)*n*(2.*k + 2.)*iter;
|
||||
double gflop = flop/pow(10., 9);
|
||||
end = clock();
|
||||
double seconds = sec(end-start);
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
|
||||
cuda_free(a_cl);
|
||||
cuda_free(b_cl);
|
||||
cuda_free(c_cl);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
srand(0);
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
float *c_gpu = random_matrix(m,n);
|
||||
memset(c, 0, m*n*sizeof(float));
|
||||
memset(c_gpu, 0, m*n*sizeof(float));
|
||||
int i;
|
||||
//pm(m,k,b);
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n);
|
||||
//printf("GPU\n");
|
||||
//pm(m, n, c_gpu);
|
||||
|
||||
gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
//printf("\n\nCPU\n");
|
||||
//pm(m, n, c);
|
||||
double sse = 0;
|
||||
for(i = 0; i < m*n; ++i) {
|
||||
//printf("%f %f\n", c[i], c_gpu[i]);
|
||||
sse += pow(c[i]-c_gpu[i], 2);
|
||||
}
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g SSE\n",m,k,k,n, TA, TB, sse/(m*n));
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
free(c_gpu);
|
||||
}
|
||||
|
||||
int test_gpu_blas()
|
||||
{
|
||||
/*
|
||||
test_gpu_accuracy(0,0,10,576,75);
|
||||
|
||||
test_gpu_accuracy(0,0,17,10,10);
|
||||
test_gpu_accuracy(1,0,17,10,10);
|
||||
test_gpu_accuracy(0,1,17,10,10);
|
||||
test_gpu_accuracy(1,1,17,10,10);
|
||||
|
||||
test_gpu_accuracy(0,0,1000,10,100);
|
||||
test_gpu_accuracy(1,0,1000,10,100);
|
||||
test_gpu_accuracy(0,1,1000,10,100);
|
||||
test_gpu_accuracy(1,1,1000,10,100);
|
||||
|
||||
test_gpu_accuracy(0,0,10,10,10);
|
||||
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,192,729,1600);
|
||||
time_gpu(0,0,384,196,1728);
|
||||
time_gpu(0,0,256,196,3456);
|
||||
time_gpu(0,0,256,196,2304);
|
||||
time_gpu(0,0,128,4096,12544);
|
||||
time_gpu(0,0,128,4096,4096);
|
||||
*/
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,576,12544);
|
||||
time_gpu(0,0,256,2304,784);
|
||||
time_gpu(1,1,2304,256,784);
|
||||
time_gpu(0,0,512,4608,196);
|
||||
time_gpu(1,1,4608,512,196);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
30
src/gemm.h
30
src/gemm.h
|
@ -1,22 +1,22 @@
|
|||
#ifndef GEMM_H
|
||||
#define GEMM_H
|
||||
|
||||
void gemm_bin(int M, int N, int K, float ALPHA,
|
||||
char *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc);
|
||||
|
||||
void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc);
|
||||
void gemm_bin(int M, int N, int K, float ALPHA,
|
||||
char *A, int lda,
|
||||
float *B, int ldb,
|
||||
float *C, int ldc);
|
||||
|
||||
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc);
|
||||
void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc);
|
||||
|
||||
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A, int lda,
|
||||
float *B, int ldb,
|
||||
float BETA,
|
||||
float *C, int ldc);
|
||||
|
||||
#ifdef GPU
|
||||
void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
#include "gemm.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A_gpu, int lda,
|
||||
float *B_gpu, int ldb,
|
||||
float BETA,
|
||||
float *C_gpu, int ldc)
|
||||
{
|
||||
hipblasHandle_t handle = blas_handle();
|
||||
hipError_t status = hipblasSgemm(handle, (TB ? HIPBLAS_OP_T : HIPBLAS_OP_N),
|
||||
(TA ? HIPBLAS_OP_T : HIPBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<32; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
}
|
||||
end = clock();
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
void time_gpu(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
int iter = 10;
|
||||
float *a = random_matrix(m,k);
|
||||
float *b = random_matrix(k,n);
|
||||
|
||||
int lda = (!TA)?k:m;
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
|
||||
float *a_cl = cuda_make_array(a, m*k);
|
||||
float *b_cl = cuda_make_array(b, k*n);
|
||||
float *c_cl = cuda_make_array(c, m*n);
|
||||
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<iter; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
double flop = ((double)m)*n*(2.*k + 2.)*iter;
|
||||
double gflop = flop/pow(10., 9);
|
||||
end = clock();
|
||||
double seconds = sec(end-start);
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
|
||||
cuda_free(a_cl);
|
||||
cuda_free(b_cl);
|
||||
cuda_free(c_cl);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
srand(0);
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
float *c_gpu = random_matrix(m,n);
|
||||
memset(c, 0, m*n*sizeof(float));
|
||||
memset(c_gpu, 0, m*n*sizeof(float));
|
||||
int i;
|
||||
//pm(m,k,b);
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n);
|
||||
//printf("GPU\n");
|
||||
//pm(m, n, c_gpu);
|
||||
|
||||
gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
//printf("\n\nCPU\n");
|
||||
//pm(m, n, c);
|
||||
double sse = 0;
|
||||
for(i = 0; i < m*n; ++i) {
|
||||
//printf("%f %f\n", c[i], c_gpu[i]);
|
||||
sse += pow(c[i]-c_gpu[i], 2);
|
||||
}
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g SSE\n",m,k,k,n, TA, TB, sse/(m*n));
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
free(c_gpu);
|
||||
}
|
||||
|
||||
int test_gpu_blas()
|
||||
{
|
||||
/*
|
||||
test_gpu_accuracy(0,0,10,576,75);
|
||||
|
||||
test_gpu_accuracy(0,0,17,10,10);
|
||||
test_gpu_accuracy(1,0,17,10,10);
|
||||
test_gpu_accuracy(0,1,17,10,10);
|
||||
test_gpu_accuracy(1,1,17,10,10);
|
||||
|
||||
test_gpu_accuracy(0,0,1000,10,100);
|
||||
test_gpu_accuracy(1,0,1000,10,100);
|
||||
test_gpu_accuracy(0,1,1000,10,100);
|
||||
test_gpu_accuracy(1,1,1000,10,100);
|
||||
|
||||
test_gpu_accuracy(0,0,10,10,10);
|
||||
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,192,729,1600);
|
||||
time_gpu(0,0,384,196,1728);
|
||||
time_gpu(0,0,256,196,3456);
|
||||
time_gpu(0,0,256,196,2304);
|
||||
time_gpu(0,0,128,4096,12544);
|
||||
time_gpu(0,0,128,4096,4096);
|
||||
*/
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,576,12544);
|
||||
time_gpu(0,0,256,2304,784);
|
||||
time_gpu(1,1,2304,256,784);
|
||||
time_gpu(0,0,512,4608,196);
|
||||
time_gpu(1,1,4608,512,196);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
#include "gemm.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef GPU
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hiprand.h"
|
||||
#include "hipblas.h"
|
||||
|
||||
void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||
float *A_gpu, int lda,
|
||||
float *B_gpu, int ldb,
|
||||
float BETA,
|
||||
float *C_gpu, int ldc)
|
||||
{
|
||||
hipblasHandle_t handle = blas_handle();
|
||||
hipError_t status = hipblasSgemm(handle, (TB ? HIPBLAS_OP_T : HIPBLAS_OP_N),
|
||||
(TA ? HIPBLAS_OP_T : HIPBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc);
|
||||
check_error(status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<32; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
}
|
||||
end = clock();
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
void time_gpu(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
int iter = 10;
|
||||
float *a = random_matrix(m,k);
|
||||
float *b = random_matrix(k,n);
|
||||
|
||||
int lda = (!TA)?k:m;
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
|
||||
float *a_cl = cuda_make_array(a, m*k);
|
||||
float *b_cl = cuda_make_array(b, k*n);
|
||||
float *c_cl = cuda_make_array(c, m*n);
|
||||
|
||||
int i;
|
||||
clock_t start = clock(), end;
|
||||
for(i = 0; i<iter; ++i){
|
||||
gemm_gpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
double flop = ((double)m)*n*(2.*k + 2.)*iter;
|
||||
double gflop = flop/pow(10., 9);
|
||||
end = clock();
|
||||
double seconds = sec(end-start);
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
|
||||
cuda_free(a_cl);
|
||||
cuda_free(b_cl);
|
||||
cuda_free(c_cl);
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
void test_gpu_accuracy(int TA, int TB, int m, int k, int n)
|
||||
{
|
||||
srand(0);
|
||||
float *a;
|
||||
if(!TA) a = random_matrix(m,k);
|
||||
else a = random_matrix(k,m);
|
||||
int lda = (!TA)?k:m;
|
||||
float *b;
|
||||
if(!TB) b = random_matrix(k,n);
|
||||
else b = random_matrix(n,k);
|
||||
int ldb = (!TB)?n:k;
|
||||
|
||||
float *c = random_matrix(m,n);
|
||||
float *c_gpu = random_matrix(m,n);
|
||||
memset(c, 0, m*n*sizeof(float));
|
||||
memset(c_gpu, 0, m*n*sizeof(float));
|
||||
int i;
|
||||
//pm(m,k,b);
|
||||
gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n);
|
||||
//printf("GPU\n");
|
||||
//pm(m, n, c_gpu);
|
||||
|
||||
gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n);
|
||||
//printf("\n\nCPU\n");
|
||||
//pm(m, n, c);
|
||||
double sse = 0;
|
||||
for(i = 0; i < m*n; ++i) {
|
||||
//printf("%f %f\n", c[i], c_gpu[i]);
|
||||
sse += pow(c[i]-c_gpu[i], 2);
|
||||
}
|
||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g SSE\n",m,k,k,n, TA, TB, sse/(m*n));
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
free(c_gpu);
|
||||
}
|
||||
|
||||
int test_gpu_blas()
|
||||
{
|
||||
/*
|
||||
test_gpu_accuracy(0,0,10,576,75);
|
||||
|
||||
test_gpu_accuracy(0,0,17,10,10);
|
||||
test_gpu_accuracy(1,0,17,10,10);
|
||||
test_gpu_accuracy(0,1,17,10,10);
|
||||
test_gpu_accuracy(1,1,17,10,10);
|
||||
|
||||
test_gpu_accuracy(0,0,1000,10,100);
|
||||
test_gpu_accuracy(1,0,1000,10,100);
|
||||
test_gpu_accuracy(0,1,1000,10,100);
|
||||
test_gpu_accuracy(1,1,1000,10,100);
|
||||
|
||||
test_gpu_accuracy(0,0,10,10,10);
|
||||
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,64,2916,363);
|
||||
time_gpu(0,0,192,729,1600);
|
||||
time_gpu(0,0,384,196,1728);
|
||||
time_gpu(0,0,256,196,3456);
|
||||
time_gpu(0,0,256,196,2304);
|
||||
time_gpu(0,0,128,4096,12544);
|
||||
time_gpu(0,0,128,4096,4096);
|
||||
*/
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,75,12544);
|
||||
time_gpu(0,0,64,576,12544);
|
||||
time_gpu(0,0,256,2304,784);
|
||||
time_gpu(1,1,2304,256,784);
|
||||
time_gpu(0,0,512,4608,196);
|
||||
time_gpu(1,1,4608,512,196);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#include "gru_layer.h"
|
||||
#include "connected_layer.h"
|
||||
#include "utils.h"
|
||||
#include "cuda.h"
|
||||
#include "blas.h"
|
||||
#include "gemm.h"
|
||||
|
||||
|
@ -10,9 +9,12 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static void increment_layer(layer *l, int steps)
|
||||
{
|
||||
int num = l->outputs*l->batch*steps;
|
||||
#ifdef GPU
|
||||
#include "hip/hip_runtime.h"
|
||||
#endif
|
||||
|
||||
static void increment_layer(layer *l, int steps) {
|
||||
int num = l->outputs * l->batch * steps;
|
||||
l->output += num;
|
||||
l->delta += num;
|
||||
l->x += num;
|
||||
|
@ -26,62 +28,60 @@ static void increment_layer(layer *l, int steps)
|
|||
#endif
|
||||
}
|
||||
|
||||
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam)
|
||||
{
|
||||
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam) {
|
||||
fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
|
||||
batch = batch / steps;
|
||||
layer l = {0};
|
||||
layer l = {(LAYER_TYPE)0};
|
||||
l.batch = batch;
|
||||
l.type = GRU;
|
||||
l.steps = steps;
|
||||
l.inputs = inputs;
|
||||
|
||||
l.uz = malloc(sizeof(layer));
|
||||
l.uz = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.uz) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam);
|
||||
*(l.uz) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam);
|
||||
l.uz->batch = batch;
|
||||
|
||||
l.wz = malloc(sizeof(layer));
|
||||
l.wz = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.wz) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam);
|
||||
*(l.wz) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam);
|
||||
l.wz->batch = batch;
|
||||
|
||||
l.ur = malloc(sizeof(layer));
|
||||
l.ur = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.ur) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam);
|
||||
*(l.ur) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam);
|
||||
l.ur->batch = batch;
|
||||
|
||||
l.wr = malloc(sizeof(layer));
|
||||
l.wr = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.wr) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam);
|
||||
*(l.wr) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam);
|
||||
l.wr->batch = batch;
|
||||
|
||||
|
||||
|
||||
l.uh = malloc(sizeof(layer));
|
||||
l.uh = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.uh) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize, adam);
|
||||
*(l.uh) = make_connected_layer(batch * steps, inputs, outputs, LINEAR, batch_normalize, adam);
|
||||
l.uh->batch = batch;
|
||||
|
||||
l.wh = malloc(sizeof(layer));
|
||||
l.wh = (layer *) malloc(sizeof(layer));
|
||||
fprintf(stderr, "\t\t");
|
||||
*(l.wh) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize, adam);
|
||||
*(l.wh) = make_connected_layer(batch * steps, outputs, outputs, LINEAR, batch_normalize, adam);
|
||||
l.wh->batch = batch;
|
||||
|
||||
l.batch_normalize = batch_normalize;
|
||||
|
||||
|
||||
l.outputs = outputs;
|
||||
l.output = calloc(outputs*batch*steps, sizeof(float));
|
||||
l.delta = calloc(outputs*batch*steps, sizeof(float));
|
||||
l.state = calloc(outputs*batch, sizeof(float));
|
||||
l.prev_state = calloc(outputs*batch, sizeof(float));
|
||||
l.forgot_state = calloc(outputs*batch, sizeof(float));
|
||||
l.forgot_delta = calloc(outputs*batch, sizeof(float));
|
||||
l.output = (float*) calloc(outputs * batch * steps, sizeof(float));
|
||||
l.delta = (float*) calloc(outputs * batch * steps, sizeof(float));
|
||||
l.state = (float*) calloc(outputs * batch, sizeof(float));
|
||||
l.prev_state = (float*) calloc(outputs * batch, sizeof(float));
|
||||
l.forgot_state = (float*) calloc(outputs * batch, sizeof(float));
|
||||
l.forgot_delta = (float*) calloc(outputs * batch, sizeof(float));
|
||||
|
||||
l.r_cpu = calloc(outputs*batch, sizeof(float));
|
||||
l.z_cpu = calloc(outputs*batch, sizeof(float));
|
||||
l.h_cpu = calloc(outputs*batch, sizeof(float));
|
||||
l.r_cpu = (float*) calloc(outputs * batch, sizeof(float));
|
||||
l.z_cpu = (float*) calloc(outputs * batch, sizeof(float));
|
||||
l.h_cpu = (float*) calloc(outputs * batch, sizeof(float));
|
||||
|
||||
l.forward = forward_gru_layer;
|
||||
l.backward = backward_gru_layer;
|
||||
|
@ -103,20 +103,19 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no
|
|||
l.h_gpu = cuda_make_array(0, batch*outputs);
|
||||
|
||||
#ifdef CUDNN
|
||||
cudnnSetTensor4dDescriptor(l.uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w);
|
||||
cudnnSetTensor4dDescriptor(l.wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.uz->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, batch, l.uz->out_c, l.uz->out_h, l.uz->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.uh->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, batch, l.uh->out_c, l.uh->out_h, l.uh->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.ur->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, batch, l.ur->out_c, l.ur->out_h, l.ur->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.wz->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, batch, l.wz->out_c, l.wz->out_h, l.wz->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.wh->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, batch, l.wh->out_c, l.wh->out_h, l.wh->out_w);
|
||||
hipdnnSetTensor4dDescriptor(l.wr->dstTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, batch, l.wr->out_c, l.wr->out_h, l.wr->out_w);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
void update_gru_layer(layer l, update_args a)
|
||||
{
|
||||
void update_gru_layer(layer l, update_args a) {
|
||||
update_connected_layer(*(l.ur), a);
|
||||
update_connected_layer(*(l.uz), a);
|
||||
update_connected_layer(*(l.uh), a);
|
||||
|
@ -125,8 +124,7 @@ void update_gru_layer(layer l, update_args a)
|
|||
update_connected_layer(*(l.wh), a);
|
||||
}
|
||||
|
||||
void forward_gru_layer(layer l, network net)
|
||||
{
|
||||
void forward_gru_layer(layer l, network net) {
|
||||
network s = net;
|
||||
s.train = net.train;
|
||||
int i;
|
||||
|
@ -145,9 +143,9 @@ void forward_gru_layer(layer l, network net)
|
|||
fill_cpu(l.outputs * l.batch * l.steps, 0, wz.delta, 1);
|
||||
fill_cpu(l.outputs * l.batch * l.steps, 0, wr.delta, 1);
|
||||
fill_cpu(l.outputs * l.batch * l.steps, 0, wh.delta, 1);
|
||||
if(net.train) {
|
||||
if (net.train) {
|
||||
fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1);
|
||||
copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1);
|
||||
copy_cpu(l.outputs * l.batch, l.state, 1, l.prev_state, 1);
|
||||
}
|
||||
|
||||
for (i = 0; i < l.steps; ++i) {
|
||||
|
@ -161,36 +159,36 @@ void forward_gru_layer(layer l, network net)
|
|||
forward_connected_layer(uh, s);
|
||||
|
||||
|
||||
copy_cpu(l.outputs*l.batch, uz.output, 1, l.z_cpu, 1);
|
||||
axpy_cpu(l.outputs*l.batch, 1, wz.output, 1, l.z_cpu, 1);
|
||||
copy_cpu(l.outputs * l.batch, uz.output, 1, l.z_cpu, 1);
|
||||
axpy_cpu(l.outputs * l.batch, 1, wz.output, 1, l.z_cpu, 1);
|
||||
|
||||
copy_cpu(l.outputs*l.batch, ur.output, 1, l.r_cpu, 1);
|
||||
axpy_cpu(l.outputs*l.batch, 1, wr.output, 1, l.r_cpu, 1);
|
||||
copy_cpu(l.outputs * l.batch, ur.output, 1, l.r_cpu, 1);
|
||||
axpy_cpu(l.outputs * l.batch, 1, wr.output, 1, l.r_cpu, 1);
|
||||
|
||||
activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC);
|
||||
activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC);
|
||||
activate_array(l.z_cpu, l.outputs * l.batch, LOGISTIC);
|
||||
activate_array(l.r_cpu, l.outputs * l.batch, LOGISTIC);
|
||||
|
||||
copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1);
|
||||
mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1);
|
||||
copy_cpu(l.outputs * l.batch, l.state, 1, l.forgot_state, 1);
|
||||
mul_cpu(l.outputs * l.batch, l.r_cpu, 1, l.forgot_state, 1);
|
||||
|
||||
s.input = l.forgot_state;
|
||||
forward_connected_layer(wh, s);
|
||||
|
||||
copy_cpu(l.outputs*l.batch, uh.output, 1, l.h_cpu, 1);
|
||||
axpy_cpu(l.outputs*l.batch, 1, wh.output, 1, l.h_cpu, 1);
|
||||
copy_cpu(l.outputs * l.batch, uh.output, 1, l.h_cpu, 1);
|
||||
axpy_cpu(l.outputs * l.batch, 1, wh.output, 1, l.h_cpu, 1);
|
||||
|
||||
if(l.tanh){
|
||||
activate_array(l.h_cpu, l.outputs*l.batch, TANH);
|
||||
if (l.tanh) {
|
||||
activate_array(l.h_cpu, l.outputs * l.batch, TANH);
|
||||
} else {
|
||||
activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC);
|
||||
activate_array(l.h_cpu, l.outputs * l.batch, LOGISTIC);
|
||||
}
|
||||
|
||||
weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output);
|
||||
weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs * l.batch, l.output);
|
||||
|
||||
copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1);
|
||||
copy_cpu(l.outputs * l.batch, l.output, 1, l.state, 1);
|
||||
|
||||
net.input += l.inputs*l.batch;
|
||||
l.output += l.outputs*l.batch;
|
||||
net.input += l.inputs * l.batch;
|
||||
l.output += l.outputs * l.batch;
|
||||
increment_layer(&uz, 1);
|
||||
increment_layer(&ur, 1);
|
||||
increment_layer(&uh, 1);
|
||||
|
@ -201,8 +199,7 @@ void forward_gru_layer(layer l, network net)
|
|||
}
|
||||
}
|
||||
|
||||
void backward_gru_layer(layer l, network net)
|
||||
{
|
||||
void backward_gru_layer(layer l, network net) {
|
||||
}
|
||||
|
||||
#ifdef GPU
|
|
@ -9,7 +9,9 @@
|
|||
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam);
|
||||
|
||||
void forward_gru_layer(layer l, network state);
|
||||
|
||||
void backward_gru_layer(layer l, network state);
|
||||
|
||||
void update_gru_layer(layer l, update_args a);
|
||||
|
||||
#ifdef GPU
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue